Browse Source
Reorganized shader files: FSR2 shaders are now copied directly into the project without alterations, Unity wrappers are located in an FSR2 folder.
mac-autoexp
Reorganized shader files: FSR2 shaders are now copied directly into the project without alterations, Unity wrappers are located in an FSR2 folder.
mac-autoexp
87 changed files with 7352 additions and 16 deletions
-
8Assets/Resources/FSR2.meta
-
2Assets/Resources/FSR2/ffx_fsr2_rcas_pass.compute
-
0Assets/Resources/FSR2/ffx_fsr2_rcas_pass.compute.meta
-
8Assets/Resources/FSR2/shaders.meta
-
0Assets/Resources/FSR2/shaders/ffx_common_types.h
-
2Assets/Resources/FSR2/shaders/ffx_common_types.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_core.h
-
2Assets/Resources/FSR2/shaders/ffx_core.h.meta
-
332Assets/Resources/FSR2/shaders/ffx_core_cpu.h
-
27Assets/Resources/FSR2/shaders/ffx_core_cpu.h.meta
-
1562Assets/Resources/FSR2/shaders/ffx_core_glsl.h
-
27Assets/Resources/FSR2/shaders/ffx_core_glsl.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_core_gpu_common.h
-
2Assets/Resources/FSR2/shaders/ffx_core_gpu_common.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_core_gpu_common_half.h
-
2Assets/Resources/FSR2/shaders/ffx_core_gpu_common_half.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_core_hlsl.h
-
2Assets/Resources/FSR2/shaders/ffx_core_hlsl.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_core_portability.h
-
2Assets/Resources/FSR2/shaders/ffx_core_portability.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_fsr1.h
-
2Assets/Resources/FSR2/shaders/ffx_fsr1.h.meta
-
263Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate.h.meta
-
99Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.glsl.meta
-
93Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.hlsl.meta
-
91Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.glsl.meta
-
85Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.hlsl.meta
-
695Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_glsl.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_glsl.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_hlsl.h
-
2Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_hlsl.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_fsr2_common.h
-
2Assets/Resources/FSR2/shaders/ffx_fsr2_common.h.meta
-
188Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid.h.meta
-
171Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl.meta
-
164Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta
-
98Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip.h.meta
-
62Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.glsl.meta
-
63Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.hlsl.meta
-
126Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h.meta
-
65Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.glsl.meta
-
60Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.hlsl.meta
-
98Assets/Resources/FSR2/shaders/ffx_fsr2_postprocess_lock_status.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_postprocess_lock_status.h.meta
-
88Assets/Resources/FSR2/shaders/ffx_fsr2_prepare_input_color.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_prepare_input_color.h.meta
-
62Assets/Resources/FSR2/shaders/ffx_fsr2_prepare_input_color_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_prepare_input_color_pass.glsl.meta
-
64Assets/Resources/FSR2/shaders/ffx_fsr2_prepare_input_color_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_prepare_input_color_pass.hlsl.meta
-
0Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h
-
2Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h.meta
-
92Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.glsl.meta
-
3Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.hlsl
-
2Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.hlsl.meta
-
202Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta
-
68Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl.meta
-
70Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl
-
7Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta
-
125Assets/Resources/FSR2/shaders/ffx_fsr2_reproject.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_reproject.h.meta
-
0Assets/Resources/FSR2/shaders/ffx_fsr2_resources.h
-
2Assets/Resources/FSR2/shaders/ffx_fsr2_resources.h.meta
-
602Assets/Resources/FSR2/shaders/ffx_fsr2_sample.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_sample.h.meta
-
214Assets/Resources/FSR2/shaders/ffx_fsr2_upsample.h
-
27Assets/Resources/FSR2/shaders/ffx_fsr2_upsample.h.meta
-
936Assets/Resources/FSR2/shaders/ffx_spd.h
-
27Assets/Resources/FSR2/shaders/ffx_spd.h.meta
-
2Assets/Scripts/FSR2Thing.cs
@ -0,0 +1,8 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: cad7d53fa2166a0449bec7a9b4f17d69 |
||||
|
folderAsset: yes |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,8 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 4a24e63edc822264a871f58397325d51 |
||||
|
folderAsset: yes |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: a44469065afba1e40ab87f099c2c8848 |
|
||||
|
guid: c6da07d8aae05f04f87e4db20f84c73e |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 35579ac40cf103a49a29966f8d991a61 |
|
||||
|
guid: b37eb663a0ae01b469b0b5a54365b301 |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -0,0 +1,332 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
/// A define for a true value in a boolean expression. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
#define FFX_TRUE (1) |
||||
|
|
||||
|
/// A define for a false value in a boolean expression. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
#define FFX_FALSE (0) |
||||
|
|
||||
|
#if !defined(FFX_STATIC) |
||||
|
/// A define to abstract declaration of static variables and functions. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
#define FFX_STATIC static |
||||
|
#endif // #if !defined(FFX_STATIC) |
||||
|
|
||||
|
#ifdef __clang__ |
||||
|
#pragma clang diagnostic ignored "-Wunused-variable" |
||||
|
#endif |
||||
|
|
||||
|
/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. |
||||
|
/// |
||||
|
/// @param [in] x A 32bit floating value. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) |
||||
|
{ |
||||
|
union |
||||
|
{ |
||||
|
FfxFloat32 f; |
||||
|
FfxUInt32 u; |
||||
|
} bits; |
||||
|
|
||||
|
bits.f = x; |
||||
|
return bits.u; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) |
||||
|
{ |
||||
|
return a[0] * b[0] + a[1] * b[1]; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) |
||||
|
{ |
||||
|
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) |
||||
|
{ |
||||
|
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; |
||||
|
} |
||||
|
|
||||
|
/// Compute the linear interopation between two values. |
||||
|
/// |
||||
|
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the |
||||
|
/// following math: |
||||
|
/// |
||||
|
/// (1 - t) * x + t * y |
||||
|
/// |
||||
|
/// @param [in] x The first value to lerp between. |
||||
|
/// @param [in] y The second value to lerp between. |
||||
|
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) |
||||
|
{ |
||||
|
return y * t + (-x * t + x); |
||||
|
} |
||||
|
|
||||
|
/// Compute the reciprocal of a value. |
||||
|
/// |
||||
|
/// @param [in] x The value to compute the reciprocal for. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// The reciprocal value of <c><i>x</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a) |
||||
|
{ |
||||
|
return 1.0f / a; |
||||
|
} |
||||
|
|
||||
|
/// Compute the square root of a value. |
||||
|
/// |
||||
|
/// @param [in] x The first value to compute the min of. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// The the square root of <c><i>x</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) |
||||
|
{ |
||||
|
return sqrt(x); |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) |
||||
|
{ |
||||
|
return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); |
||||
|
} |
||||
|
|
||||
|
/// Compute the factional part of a decimal value. |
||||
|
/// |
||||
|
/// This function calculates <c><i>x - floor(x)</i></c>. |
||||
|
/// |
||||
|
/// @param [in] x The value to compute the fractional part from. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// The fractional part of <c><i>x</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a) |
||||
|
{ |
||||
|
return a - floor(a); |
||||
|
} |
||||
|
|
||||
|
/// Compute the reciprocal square root of a value. |
||||
|
/// |
||||
|
/// @param [in] x The value to compute the reciprocal for. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// The reciprocal square root value of <c><i>x</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a) |
||||
|
{ |
||||
|
return ffxReciprocal(ffxSqrt(a)); |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) |
||||
|
{ |
||||
|
return x < y ? x : y; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) |
||||
|
{ |
||||
|
return x < y ? x : y; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) |
||||
|
{ |
||||
|
return x > y ? x : y; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) |
||||
|
{ |
||||
|
return x > y ? x : y; |
||||
|
} |
||||
|
|
||||
|
/// Clamp a value to a [0..1] range. |
||||
|
/// |
||||
|
/// @param [in] x The value to clamp to [0..1] range. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// The clamped version of <c><i>x</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a) |
||||
|
{ |
||||
|
return ffxMin(1.0f, ffxMax(0.0f, a)); |
||||
|
} |
||||
|
|
||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
||||
|
|
||||
|
FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) |
||||
|
{ |
||||
|
d[0] = a[0] + b; |
||||
|
d[1] = a[1] + b; |
||||
|
d[2] = a[2] + b; |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) |
||||
|
{ |
||||
|
d[0] = a[0]; |
||||
|
d[1] = a[1]; |
||||
|
d[2] = a[2]; |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) |
||||
|
{ |
||||
|
d[0] = a[0] * b[0]; |
||||
|
d[1] = a[1] * b[1]; |
||||
|
d[2] = a[2] * b[2]; |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) |
||||
|
{ |
||||
|
d[0] = a[0] * b; |
||||
|
d[1] = a[1] * b; |
||||
|
d[2] = a[2] * b; |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) |
||||
|
{ |
||||
|
d[0] = ffxReciprocal(a[0]); |
||||
|
d[1] = ffxReciprocal(a[1]); |
||||
|
d[2] = ffxReciprocal(a[2]); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
/// Convert FfxFloat32 to half (in lower 16-bits of output). |
||||
|
/// |
||||
|
/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf |
||||
|
/// |
||||
|
/// The function supports denormals. |
||||
|
/// |
||||
|
/// Some conversion rules are to make computations possibly "safer" on the GPU, |
||||
|
/// -INF & -NaN -> -65504 |
||||
|
/// +INF & +NaN -> +65504 |
||||
|
/// |
||||
|
/// @param [in] f The 32bit floating point value to convert. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// The closest 16bit floating point value to <c><i>f</i></c>. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) |
||||
|
{ |
||||
|
static FfxUInt16 base[512] = { |
||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, |
||||
|
0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, |
||||
|
0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
||||
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
||||
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, |
||||
|
0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, |
||||
|
0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, |
||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
||||
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff |
||||
|
}; |
||||
|
|
||||
|
static FfxUInt8 shift[512] = { |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, |
||||
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, |
||||
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
||||
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 |
||||
|
}; |
||||
|
|
||||
|
union |
||||
|
{ |
||||
|
FfxFloat32 f; |
||||
|
FfxUInt32 u; |
||||
|
} bits; |
||||
|
|
||||
|
bits.f = f; |
||||
|
FfxUInt32 u = bits.u; |
||||
|
FfxUInt32 i = u >> 23; |
||||
|
return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); |
||||
|
} |
||||
|
|
||||
|
/// Pack 2x32-bit floating point values in a single 32bit value. |
||||
|
/// |
||||
|
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating |
||||
|
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the |
||||
|
/// 32bit unsigned integer respectively. |
||||
|
/// |
||||
|
/// @param [in] value A 2-dimensional floating point value to convert and pack. |
||||
|
/// |
||||
|
/// @returns |
||||
|
/// A packed 32bit value containing 2 16bit floating point values. |
||||
|
/// |
||||
|
/// @ingroup CPU |
||||
|
FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a) |
||||
|
{ |
||||
|
return f32tof16(a[0]) + (f32tof16(a[1]) << 16); |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 9d1d6ed5c9da0c64b882f3ebc2bac307 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
1562
Assets/Resources/FSR2/shaders/ffx_core_glsl.h
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 8b1e08364d8608e4ea7f4372e5b2de67 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 041095789ff5b314fae434ff885e389d |
|
||||
|
guid: 402c509393f5bf647b41a962a48ed8e2 |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: d16a1a4953abf534caaa4bd8843de607 |
|
||||
|
guid: 142bf3947ada43541a0f31a328fdec07 |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: d9d43e208984ec94facb71623986b1cf |
|
||||
|
guid: 89d6e02f97594f64ca2da4c8124df6cf |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 99bc6885ecf87554aae6119a96eaaf6c |
|
||||
|
guid: 5d6e692075988194382122bac7819f02 |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 7d1bd748bf4775847ac7cec07191796a |
|
||||
|
guid: 7524e42f73f97f34bbeb414ea412a808 |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -0,0 +1,263 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_ACCUMULATE_H |
||||
|
#define FFX_FSR2_ACCUMULATE_H |
||||
|
|
||||
|
#define FFX_FSR2_OPTION_GUARANTEE_UPSAMPLE_WEIGHT_ON_NEW_SAMPLES 1 |
||||
|
|
||||
|
FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) |
||||
|
{ |
||||
|
return length(fMotionVector * DisplaySize()); |
||||
|
} |
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) |
||||
|
{ |
||||
|
return length(fMotionVector * FFX_MIN16_F2(DisplaySize())); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
void Accumulate(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT FfxFloat32x4 fHistory, FFX_PARAMETER_IN FfxFloat32x4 fUpsampled, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, FFX_PARAMETER_IN FfxFloat32 fHrVelocity) |
||||
|
{ |
||||
|
fHistory.w = fHistory.w + fUpsampled.w; |
||||
|
|
||||
|
fUpsampled.rgb = YCoCgToRGB(fUpsampled.rgb); |
||||
|
|
||||
|
const FfxFloat32 fAlpha = fUpsampled.w / fHistory.w; |
||||
|
fHistory.rgb = ffxLerp(fHistory.rgb, fUpsampled.rgb, fAlpha); |
||||
|
|
||||
|
FfxFloat32 fMaxAverageWeight = FfxFloat32(ffxLerp(MaxAccumulationWeight(), accumulationMaxOnMotion, ffxSaturate(fHrVelocity * 10.0f))); |
||||
|
fHistory.w = ffxMin(fHistory.w, fMaxAverageWeight); |
||||
|
} |
||||
|
|
||||
|
void RectifyHistory( |
||||
|
RectificationBoxData clippingBox, |
||||
|
inout FfxFloat32x4 fHistory, |
||||
|
FFX_PARAMETER_IN FfxFloat32x3 fLockStatus, |
||||
|
FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, |
||||
|
FFX_PARAMETER_IN FfxFloat32 fLumaStabilityFactor, |
||||
|
FFX_PARAMETER_IN FfxFloat32 fLuminanceDiff, |
||||
|
FFX_PARAMETER_IN FfxFloat32 fUpsampleWeight, |
||||
|
FFX_PARAMETER_IN FfxFloat32 fLockContributionThisFrame) |
||||
|
{ |
||||
|
FfxFloat32 fScaleFactorInfluence = FfxFloat32(1.0f / DownscaleFactor().x - 1); |
||||
|
FfxFloat32 fBoxScale = FfxFloat32(1.0f) + (FfxFloat32(0.5f) * fScaleFactorInfluence); |
||||
|
|
||||
|
FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; |
||||
|
FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; |
||||
|
FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec; |
||||
|
FfxFloat32x3 boxCenter = clippingBox.boxCenter; |
||||
|
FfxFloat32 boxVecSize = length(clippingBox.boxVec); |
||||
|
|
||||
|
boxMin = ffxMax(clippingBox.aabbMin, boxMin); |
||||
|
boxMax = ffxMin(clippingBox.aabbMax, boxMax); |
||||
|
|
||||
|
FfxFloat32x3 distToClampOutside = ffxMax(ffxMax(FfxFloat32x3(0, 0, 0), boxMin - fHistory.xyz), ffxMax(FfxFloat32x3(0, 0, 0), fHistory.xyz - boxMax)); |
||||
|
|
||||
|
if (any(FFX_GREATER_THAN(distToClampOutside, FfxFloat32x3(0, 0, 0)))) { |
||||
|
|
||||
|
const FfxFloat32x3 clampedHistorySample = clamp(fHistory.xyz, boxMin, boxMax); |
||||
|
|
||||
|
FfxFloat32x3 clippedHistoryToBoxCenter = abs(clampedHistorySample - boxCenter); |
||||
|
FfxFloat32x3 historyToBoxCenter = abs(fHistory.xyz - boxCenter); |
||||
|
FfxFloat32x3 HistoryColorWeight; |
||||
|
HistoryColorWeight.x = historyToBoxCenter.x > FfxFloat32(0) ? clippedHistoryToBoxCenter.x / historyToBoxCenter.x : FfxFloat32(0.0f); |
||||
|
HistoryColorWeight.y = historyToBoxCenter.y > FfxFloat32(0) ? clippedHistoryToBoxCenter.y / historyToBoxCenter.y : FfxFloat32(0.0f); |
||||
|
HistoryColorWeight.z = historyToBoxCenter.z > FfxFloat32(0) ? clippedHistoryToBoxCenter.z / historyToBoxCenter.z : FfxFloat32(0.0f); |
||||
|
|
||||
|
FfxFloat32x3 fHistoryContribution = HistoryColorWeight; |
||||
|
|
||||
|
// only lock luma |
||||
|
fHistoryContribution += ffxMax(fLockContributionThisFrame, fLumaStabilityFactor).xxx; |
||||
|
fHistoryContribution *= (fDepthClipFactor * fDepthClipFactor); |
||||
|
|
||||
|
fHistory.xyz = ffxLerp(clampedHistorySample.xyz, fHistory.xyz, ffxSaturate(fHistoryContribution)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor) |
||||
|
{ |
||||
|
StoreUpscaledOutput(iPxHrPos, fUpscaledColor); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 GetLumaStabilityFactor(FfxFloat32x2 fHrUv, FfxFloat32 fHrVelocity) |
||||
|
{ |
||||
|
FfxFloat32 fLumaStabilityFactor = SampleLumaStabilityFactor(fHrUv); |
||||
|
|
||||
|
// Only apply on still, have to reproject luma history resource if we want it to work on motion |
||||
|
fLumaStabilityFactor *= FfxFloat32(fHrVelocity < 0.1f); |
||||
|
|
||||
|
return fLumaStabilityFactor; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 GetLockContributionThisFrame(FfxFloat32x2 fUvCoord, FfxFloat32 fAccumulationMask, FfxFloat32 fParticleMask, FfxFloat32x3 fLockStatus) |
||||
|
{ |
||||
|
const FfxFloat32 fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); |
||||
|
|
||||
|
// Rectify on lock frame |
||||
|
FfxFloat32 fLockContributionThisFrame = ffxSaturate(fNormalizedLockLifetime * FfxFloat32(4)); |
||||
|
|
||||
|
return fLockContributionThisFrame; |
||||
|
} |
||||
|
|
||||
|
void FinalizeLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x3 fLockStatus, FfxFloat32 fUpsampledWeight) |
||||
|
{ |
||||
|
// Increase trust |
||||
|
const FfxFloat32 fTrustIncreaseLanczosMax = FfxFloat32(12); // same increase no matter the MaxAccumulationWeight() value. |
||||
|
const FfxFloat32 fTrustIncrease = FfxFloat32(fUpsampledWeight / fTrustIncreaseLanczosMax); |
||||
|
fLockStatus[LOCK_TRUST] = ffxMin(FfxFloat32(1), fLockStatus[LOCK_TRUST] + fTrustIncrease); |
||||
|
|
||||
|
// Decrease lock lifetime |
||||
|
const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(averageLanczosWeightPerFrame); |
||||
|
const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); |
||||
|
fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); |
||||
|
|
||||
|
StoreLockStatus(iPxHrPos, fLockStatus); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 ComputeMaxAccumulationWeight(FfxFloat32 fHrVelocity, FfxFloat32 fReactiveMax, FfxFloat32 fDepthClipFactor, FfxFloat32 fLuminanceDiff, LockState lockState) { |
||||
|
|
||||
|
FfxFloat32 normalizedMinimum = FfxFloat32(accumulationMaxOnMotion) / FfxFloat32(MaxAccumulationWeight()); |
||||
|
|
||||
|
FfxFloat32 fReactiveMaxAccumulationWeight = FfxFloat32(1) - fReactiveMax; |
||||
|
FfxFloat32 fMotionMaxAccumulationWeight = ffxLerp(FfxFloat32(1), normalizedMinimum, ffxSaturate(fHrVelocity * FfxFloat32(10))); |
||||
|
FfxFloat32 fDepthClipMaxAccumulationWeight = fDepthClipFactor; |
||||
|
|
||||
|
FfxFloat32 fLuminanceDiffMaxAccumulationWeight = ffxSaturate(ffxMax(normalizedMinimum, FfxFloat32(1) - fLuminanceDiff)); |
||||
|
|
||||
|
FfxFloat32 maxAccumulation = FfxFloat32(MaxAccumulationWeight()) * ffxMin( |
||||
|
ffxMin(fReactiveMaxAccumulationWeight, fMotionMaxAccumulationWeight), |
||||
|
ffxMin(fDepthClipMaxAccumulationWeight, fLuminanceDiffMaxAccumulationWeight) |
||||
|
); |
||||
|
|
||||
|
return (lockState.NewLock && !lockState.WasLockedPrevFrame) ? FfxFloat32(accumulationMaxOnMotion) : maxAccumulation; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 ComputeKernelWeight(in FfxFloat32 fHistoryWeight, in FfxFloat32 fDepthClipFactor, in FfxFloat32 fReactivityFactor) { |
||||
|
FfxFloat32 fKernelSizeBias = ffxSaturate(ffxMax(FfxFloat32(0), fHistoryWeight - FfxFloat32(0.5)) / FfxFloat32(3)); |
||||
|
|
||||
|
FfxFloat32 fOneMinusReactiveMax = FfxFloat32(1) - fReactivityFactor; |
||||
|
FfxFloat32x2 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)) * FfxFloat32(fKernelSizeBias) * fOneMinusReactiveMax; |
||||
|
|
||||
|
//average value on disocclusion, to help decrease high value sample importance wait for accumulation to kick in |
||||
|
fKernelWeight *= FfxFloat32x2(0.5f, 0.5f) + fDepthClipFactor * FfxFloat32x2(0.5f, 0.5f); |
||||
|
|
||||
|
return ffxMin(FfxFloat32x2(1.99f, 1.99f), fKernelWeight); |
||||
|
} |
||||
|
|
||||
|
void Accumulate(FfxInt32x2 iPxHrPos) |
||||
|
{ |
||||
|
const FfxFloat32x2 fSamplePosHr = iPxHrPos + 0.5f; |
||||
|
const FfxFloat32x2 fPxLrPos = fSamplePosHr * DownscaleFactor(); // Source resolution output pixel center position |
||||
|
const FfxInt32x2 iPxLrPos = FfxInt32x2(floor(fPxLrPos)); // TODO: what about weird upscale factors... |
||||
|
|
||||
|
const FfxFloat32x2 fSamplePosUnjitterLr = (FfxFloat32x2(iPxLrPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 |
||||
|
|
||||
|
const FfxFloat32x2 fLrUvJittered = (fPxLrPos + Jitter()) / RenderSize(); |
||||
|
|
||||
|
const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); |
||||
|
const FfxFloat32x2 fMotionVector = GetMotionVector(iPxHrPos, fHrUv); |
||||
|
|
||||
|
const FfxFloat32 fHrVelocity = GetPxHrVelocity(fMotionVector); |
||||
|
const FfxFloat32 fDepthClipFactor = ffxSaturate(SampleDepthClip(fLrUvJittered)); |
||||
|
const FfxFloat32 fLumaStabilityFactor = GetLumaStabilityFactor(fHrUv, fHrVelocity); |
||||
|
const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered); |
||||
|
const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x; |
||||
|
const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y; |
||||
|
const FfxBoolean bIsResetFrame = (0 == FrameIndex()); |
||||
|
|
||||
|
FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0); |
||||
|
FfxFloat32x3 fLockStatus; |
||||
|
InitializeNewLockSample(fLockStatus); |
||||
|
FfxBoolean bIsExistingSample = FFX_TRUE; |
||||
|
|
||||
|
FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0); |
||||
|
ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample); |
||||
|
|
||||
|
if (bIsExistingSample && !bIsResetFrame) { |
||||
|
ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight); |
||||
|
ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 fLuminanceDiff = FfxFloat32(0.0f); |
||||
|
|
||||
|
LockState lockState = PostProcessLockStatus(iPxHrPos, fLrUvJittered, FfxFloat32(fDepthClipFactor), fAccumulationMask, fHrVelocity, fHistoryColorAndWeight.w, fLockStatus, fLuminanceDiff); |
||||
|
|
||||
|
fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, ComputeMaxAccumulationWeight( |
||||
|
FfxFloat32(fHrVelocity), fReactiveMax, FfxFloat32(fDepthClipFactor), FfxFloat32(fLuminanceDiff), lockState |
||||
|
)); |
||||
|
|
||||
|
const FfxFloat32 fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); |
||||
|
|
||||
|
// Kill accumulation based on shading change |
||||
|
fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, FfxFloat32(ffxMax(0.0f, MaxAccumulationWeight() * ffxPow(FfxFloat32(1) - fLuminanceDiff, 2.0f / 1.0f)))); |
||||
|
|
||||
|
// Load upsampled input color |
||||
|
RectificationBoxData clippingBox; |
||||
|
|
||||
|
FfxFloat32 fKernelBias = fAccumulationMask * ffxSaturate(ffxMax(0.0f, fHistoryColorAndWeight.w - 0.5f) / 3.0f); |
||||
|
|
||||
|
FfxFloat32 fReactiveWeighted = 0; |
||||
|
|
||||
|
// No trust in reactive areas |
||||
|
fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], FfxFloat32(1.0f) - FfxFloat32(pow(fReactiveMax, 1.0f / 3.0f))); |
||||
|
fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], FfxFloat32(fDepthClipFactor)); |
||||
|
|
||||
|
FfxFloat32x2 fKernelWeight = ComputeKernelWeight(fHistoryColorAndWeight.w, FfxFloat32(fDepthClipFactor), ffxMax((FfxFloat32(1) - fLockStatus[LOCK_TRUST]), fReactiveMax)); |
||||
|
|
||||
|
FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(iPxHrPos, fKernelWeight, clippingBox); |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_GUARANTEE_UPSAMPLE_WEIGHT_ON_NEW_SAMPLES |
||||
|
// Make sure all samples have same weight on reset/first frame. Upsampled weight should never be 0.0f when history accumulation is 0.0f. |
||||
|
fUpsampledColorAndWeight.w = (fHistoryColorAndWeight.w == 0.0f) ? ffxMax(FSR2_EPSILON, fUpsampledColorAndWeight.w) : fUpsampledColorAndWeight.w; |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32 fLockContributionThisFrame = GetLockContributionThisFrame(fHrUv, fAccumulationMask, fReactiveMax, fLockStatus); |
||||
|
|
||||
|
// Update accumulation and rectify history |
||||
|
if (fHistoryColorAndWeight.w > FfxFloat32(0)) { |
||||
|
|
||||
|
RectifyHistory(clippingBox, fHistoryColorAndWeight, fLockStatus, FfxFloat32(fDepthClipFactor), FfxFloat32(fLumaStabilityFactor), FfxFloat32(fLuminanceDiff), fUpsampledColorAndWeight.w, fLockContributionThisFrame); |
||||
|
|
||||
|
fHistoryColorAndWeight.rgb = YCoCgToRGB(fHistoryColorAndWeight.rgb); |
||||
|
} |
||||
|
|
||||
|
Accumulate(iPxHrPos, fHistoryColorAndWeight, fUpsampledColorAndWeight, fDepthClipFactor, fHrVelocity); |
||||
|
|
||||
|
//Subtract accumulation weight in reactive areas |
||||
|
fHistoryColorAndWeight.w -= fUpsampledColorAndWeight.w * fReactiveMax; |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT |
||||
|
fHistoryColorAndWeight.rgb = InverseTonemap(fHistoryColorAndWeight.rgb); |
||||
|
#endif |
||||
|
fHistoryColorAndWeight.rgb /= FfxFloat32(Exposure()); |
||||
|
|
||||
|
FinalizeLockStatus(iPxHrPos, fLockStatus, fUpsampledColorAndWeight.w); |
||||
|
|
||||
|
StoreInternalColorAndWeight(iPxHrPos, fHistoryColorAndWeight); |
||||
|
|
||||
|
// Output final color when RCAS is disabled |
||||
|
#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0 |
||||
|
WriteUpscaledOutput(iPxHrPos, fHistoryColorAndWeight.rgb); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
#endif // FFX_FSR2_ACCUMULATE_H |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: a04cb2522aaff1045869a272ed129964 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,99 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 5 |
||||
|
// SRV 4 : FSR2_Exposure : r_exposure |
||||
|
// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask |
||||
|
// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors |
||||
|
// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color |
||||
|
// SRV 11 : FSR2_LockStatus2 : r_lock_status |
||||
|
// SRV 12 : FSR2_DepthClip : r_depth_clip |
||||
|
// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color |
||||
|
// SRV 14 : FSR2_LumaHistory : r_luma_history |
||||
|
// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut |
||||
|
// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut |
||||
|
// SRV 27 : FSR2_ReactiveMaskMax : r_reactive_max |
||||
|
// SRV 28 : FSR2_ExposureMips : r_imgMips |
||||
|
// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color |
||||
|
// UAV 11 : FSR2_LockStatus1 : rw_lock_status |
||||
|
// UAV 18 : DisplayOutput : rw_upscaled_output |
||||
|
// CB 0 : cbFSR2 |
||||
|
// CB 1 : FSR2DispatchOffsets |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_EXPOSURE 0 |
||||
|
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 |
||||
|
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS |
||||
|
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 |
||||
|
#else |
||||
|
#define FSR2_BIND_SRV_MOTION_VECTORS 2 |
||||
|
#endif |
||||
|
#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 |
||||
|
#define FSR2_BIND_SRV_LOCK_STATUS 4 |
||||
|
#define FSR2_BIND_SRV_DEPTH_CLIP 5 |
||||
|
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 |
||||
|
#define FSR2_BIND_SRV_LUMA_HISTORY 7 |
||||
|
#define FSR2_BIND_SRV_LANCZOS_LUT 8 |
||||
|
#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 |
||||
|
#define FSR2_BIND_SRV_EXPOSURE_MIPS 10 |
||||
|
#define FSR2_BIND_UAV_INTERNAL_UPSCALED 11 |
||||
|
#define FSR2_BIND_UAV_LOCK_STATUS 12 |
||||
|
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 13 |
||||
|
|
||||
|
#define FSR2_BIND_CB_FSR2 14 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_upsample.h" |
||||
|
#include "ffx_fsr2_postprocess_lock_status.h" |
||||
|
#include "ffx_fsr2_reproject.h" |
||||
|
#include "ffx_fsr2_accumulate.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
|
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
uvec2 uGroupId = gl_WorkGroupID.xy; |
||||
|
const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT; |
||||
|
uGroupId.y = GroupRows - uGroupId.y - 1; |
||||
|
|
||||
|
uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy; |
||||
|
|
||||
|
Accumulate(ivec2(uDispatchThreadId)); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: a47992654ac46784d976db0091b79aeb |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,93 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 5 |
||||
|
// SRV 4 : FSR2_Exposure : r_exposure |
||||
|
// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask |
||||
|
// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors |
||||
|
// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color |
||||
|
// SRV 11 : FSR2_LockStatus2 : r_lock_status |
||||
|
// SRV 12 : FSR2_DepthClip : r_depth_clip |
||||
|
// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color |
||||
|
// SRV 14 : FSR2_LumaHistory : r_luma_history |
||||
|
// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut |
||||
|
// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut |
||||
|
// SRV 27 : FSR2_DilatedReactiveMasks : r_dilated_reactive_masks |
||||
|
// SRV 28 : FSR2_ExposureMips : r_imgMips |
||||
|
// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color |
||||
|
// UAV 11 : FSR2_LockStatus1 : rw_lock_status |
||||
|
// UAV 18 : DisplayOutput : rw_upscaled_output |
||||
|
// CB 0 : cbFSR2 |
||||
|
// CB 1 : FSR2DispatchOffsets |
||||
|
|
||||
|
#define FSR2_BIND_SRV_EXPOSURE 0 |
||||
|
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS |
||||
|
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 |
||||
|
#else |
||||
|
#define FSR2_BIND_SRV_MOTION_VECTORS 2 |
||||
|
#endif |
||||
|
#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 |
||||
|
#define FSR2_BIND_SRV_LOCK_STATUS 4 |
||||
|
#define FSR2_BIND_SRV_DEPTH_CLIP 5 |
||||
|
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 |
||||
|
#define FSR2_BIND_SRV_LUMA_HISTORY 7 |
||||
|
#define FSR2_BIND_SRV_LANCZOS_LUT 8 |
||||
|
#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 |
||||
|
#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 10 |
||||
|
#define FSR2_BIND_SRV_EXPOSURE_MIPS 11 |
||||
|
#define FSR2_BIND_UAV_INTERNAL_UPSCALED 0 |
||||
|
#define FSR2_BIND_UAV_LOCK_STATUS 1 |
||||
|
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 |
||||
|
|
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_upsample.h" |
||||
|
#include "ffx_fsr2_postprocess_lock_status.h" |
||||
|
#include "ffx_fsr2_reproject.h" |
||||
|
#include "ffx_fsr2_accumulate.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_ROOTSIG_CONTENT |
||||
|
void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) |
||||
|
{ |
||||
|
const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT; |
||||
|
uGroupId.y = GroupRows - uGroupId.y - 1; |
||||
|
|
||||
|
uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; |
||||
|
|
||||
|
Accumulate(uDispatchThreadId); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 356ec46d3f01672428b5a7a0de727548 |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,91 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 |
||||
|
#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 |
||||
|
#define FSR2_BIND_UAV_REACTIVE 2 |
||||
|
#define FSR2_BIND_CB_REACTIVE 3 |
||||
|
#define FSR2_BIND_CB_FSR2 4 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
|
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha; |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha; |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask; |
||||
|
|
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t |
||||
|
{ |
||||
|
float scale; |
||||
|
float threshold; |
||||
|
float binaryValue; |
||||
|
uint flags; |
||||
|
} cbGenerateReactive; |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy; |
||||
|
|
||||
|
FfxFloat32x3 ColorPreAlpha = texelFetch(r_input_color_pre_alpha, FfxInt32x2(uDispatchThreadId), 0).rgb; |
||||
|
FfxFloat32x3 ColorPostAlpha = texelFetch(r_input_color_post_alpha, FfxInt32x2(uDispatchThreadId), 0).rgb; |
||||
|
|
||||
|
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0) |
||||
|
{ |
||||
|
ColorPreAlpha = Tonemap(ColorPreAlpha); |
||||
|
ColorPostAlpha = Tonemap(ColorPostAlpha); |
||||
|
} |
||||
|
|
||||
|
if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0) |
||||
|
{ |
||||
|
ColorPreAlpha = InverseTonemap(ColorPreAlpha); |
||||
|
ColorPostAlpha = InverseTonemap(ColorPostAlpha); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 out_reactive_value = 0.f; |
||||
|
FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha); |
||||
|
|
||||
|
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta); |
||||
|
out_reactive_value *= cbGenerateReactive.scale; |
||||
|
|
||||
|
out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value; |
||||
|
|
||||
|
imageStore(rw_output_reactive_mask, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value)); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 9b3697948343bfb42ac4fbc18d2fb8be |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,85 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 |
||||
|
#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 |
||||
|
#define FSR2_BIND_UAV_REACTIVE 0 |
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
|
||||
|
Texture2D<float4> r_input_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PRE_ALPHA_COLOR); |
||||
|
Texture2D<float4> r_input_color_post_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_POST_ALPHA_COLOR); |
||||
|
RWTexture2D<float> rw_output_reactive_mask : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_REACTIVE); |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
cbuffer cbGenerateReactive : register(b0) |
||||
|
{ |
||||
|
float scale; |
||||
|
float threshold; |
||||
|
float binaryValue; |
||||
|
uint flags; |
||||
|
}; |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_ROOTSIG_CONTENT |
||||
|
void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) |
||||
|
{ |
||||
|
uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; |
||||
|
|
||||
|
float3 ColorPreAlpha = r_input_color_pre_alpha[uDispatchThreadId].rgb; |
||||
|
float3 ColorPostAlpha = r_input_color_post_alpha[uDispatchThreadId].rgb; |
||||
|
|
||||
|
if (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) |
||||
|
{ |
||||
|
ColorPreAlpha = Tonemap(ColorPreAlpha); |
||||
|
ColorPostAlpha = Tonemap(ColorPostAlpha); |
||||
|
} |
||||
|
|
||||
|
if (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) |
||||
|
{ |
||||
|
ColorPreAlpha = InverseTonemap(ColorPreAlpha); |
||||
|
ColorPostAlpha = InverseTonemap(ColorPostAlpha); |
||||
|
} |
||||
|
|
||||
|
float out_reactive_value = 0.f; |
||||
|
float3 delta = abs(ColorPostAlpha - ColorPreAlpha); |
||||
|
|
||||
|
out_reactive_value = (flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta); |
||||
|
out_reactive_value *= scale; |
||||
|
|
||||
|
out_reactive_value = (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < threshold ? 0 : binaryValue) : out_reactive_value; |
||||
|
|
||||
|
rw_output_reactive_mask[uDispatchThreadId] = out_reactive_value; |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: cc76bd6f46792f3418a56b79eb5c959b |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,695 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
#include "ffx_fsr2_resources.h" |
||||
|
|
||||
|
#if defined(FFX_GPU) |
||||
|
#include "ffx_core.h" |
||||
|
#endif // #if defined(FFX_GPU) |
||||
|
|
||||
|
#if defined(FFX_GPU) |
||||
|
#ifndef FFX_FSR2_PREFER_WAVE64 |
||||
|
#define FFX_FSR2_PREFER_WAVE64 |
||||
|
#endif // #if defined(FFX_GPU) |
||||
|
|
||||
|
#if defined(FSR2_BIND_CB_FSR2) |
||||
|
layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t |
||||
|
{ |
||||
|
FfxInt32x2 iRenderSize; |
||||
|
FfxInt32x2 iDisplaySize; |
||||
|
FfxInt32x2 uLumaMipDimensions; |
||||
|
FfxInt32 uLumaMipLevelToUse; |
||||
|
FfxInt32 uFrameIndex; |
||||
|
FfxFloat32x2 fDisplaySizeRcp; |
||||
|
FfxFloat32x2 fJitter; |
||||
|
FfxFloat32x4 fDeviceToViewDepth; |
||||
|
FfxFloat32x2 depthclip_uv_scale; |
||||
|
FfxFloat32x2 postprocessed_lockstatus_uv_scale; |
||||
|
FfxFloat32x2 reactive_mask_dim_rcp; |
||||
|
FfxFloat32x2 MotionVectorScale; |
||||
|
FfxFloat32x2 fDownscaleFactor; |
||||
|
FfxFloat32 fPreExposure; |
||||
|
FfxFloat32 fTanHalfFOV; |
||||
|
FfxFloat32x2 fMotionVectorJitterCancellation; |
||||
|
FfxFloat32 fJitterSequenceLength; |
||||
|
FfxFloat32 fLockInitialLifetime; |
||||
|
FfxFloat32 fLockTickDelta; |
||||
|
FfxFloat32 fDeltaTime; |
||||
|
FfxFloat32 fDynamicResChangeFactor; |
||||
|
FfxFloat32 fLumaMipRcp; |
||||
|
} cbFSR2; |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32 LumaMipRcp() |
||||
|
{ |
||||
|
return cbFSR2.fLumaMipRcp; |
||||
|
} |
||||
|
|
||||
|
FfxInt32x2 LumaMipDimensions() |
||||
|
{ |
||||
|
return cbFSR2.uLumaMipDimensions; |
||||
|
} |
||||
|
|
||||
|
FfxInt32 LumaMipLevelToUse() |
||||
|
{ |
||||
|
return cbFSR2.uLumaMipLevelToUse; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 DownscaleFactor() |
||||
|
{ |
||||
|
return cbFSR2.fDownscaleFactor; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 Jitter() |
||||
|
{ |
||||
|
return cbFSR2.fJitter; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 MotionVectorJitterCancellation() |
||||
|
{ |
||||
|
return cbFSR2.fMotionVectorJitterCancellation; |
||||
|
} |
||||
|
|
||||
|
FfxInt32x2 RenderSize() |
||||
|
{ |
||||
|
return cbFSR2.iRenderSize; |
||||
|
} |
||||
|
|
||||
|
FfxInt32x2 DisplaySize() |
||||
|
{ |
||||
|
return cbFSR2.iDisplaySize; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 DisplaySizeRcp() |
||||
|
{ |
||||
|
return cbFSR2.fDisplaySizeRcp; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 JitterSequenceLength() |
||||
|
{ |
||||
|
return cbFSR2.fJitterSequenceLength; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LockInitialLifetime() |
||||
|
{ |
||||
|
return cbFSR2.fLockInitialLifetime; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LockTickDelta() |
||||
|
{ |
||||
|
return cbFSR2.fLockTickDelta; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 DeltaTime() |
||||
|
{ |
||||
|
return cbFSR2.fDeltaTime; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 MaxAccumulationWeight() |
||||
|
{ |
||||
|
const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples |
||||
|
|
||||
|
return 12; //32.0f * averageLanczosWeightPerFrame; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 DynamicResChangeFactor() |
||||
|
{ |
||||
|
return cbFSR2.fDynamicResChangeFactor; |
||||
|
} |
||||
|
|
||||
|
FfxInt32 FrameIndex() |
||||
|
{ |
||||
|
return cbFSR2.uFrameIndex; |
||||
|
} |
||||
|
|
||||
|
layout (set = 0, binding = 0) uniform sampler s_PointClamp; |
||||
|
layout (set = 0, binding = 1) uniform sampler s_LinearClamp; |
||||
|
|
||||
|
// SRVs |
||||
|
#if defined(FSR2_BIND_SRV_INPUT_COLOR) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_MOTION_VECTORS) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_MOTION_VECTORS) uniform texture2D r_motion_vectors; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_DEPTH) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_DEPTH) uniform texture2D r_depth; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_EXPOSURE) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_EXPOSURE) uniform texture2D r_exposure; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_REACTIVE_MASK) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_DEPTH) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_LOCK_STATUS) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_DEPTH_CLIP) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_DEPTH_CLIP) uniform texture2D r_depth_clip; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_LUMA_HISTORY) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_RCAS_INPUT) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_LANCZOS_LUT) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_EXPOSURE_MIPS) uniform texture2D r_imgMips; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) |
||||
|
layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; |
||||
|
#endif |
||||
|
|
||||
|
// UAV |
||||
|
#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f) uniform image2D rw_dilated_motion_vectors; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_DILATED_DEPTH |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilatedDepth; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f) uniform image2D rw_internal_upscaled_color; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_LOCK_STATUS |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_DEPTH_CLIP |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) uniform image2D rw_prepared_input_color; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_LUMA_HISTORY |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D rw_img_mip_shading_change; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f) coherent uniform image2D rw_img_mip_5; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f) uniform image2D rw_dilated_reactive_masks; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_EXPOSURE |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; |
||||
|
#endif |
||||
|
#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC |
||||
|
layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) |
||||
|
return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
|
||||
|
FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) |
||||
|
fUV *= cbFSR2.depthclip_uv_scale; |
||||
|
return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
// |
||||
|
// a 0 0 0 x |
||||
|
// 0 b 0 0 y |
||||
|
// 0 0 c d z |
||||
|
// 0 0 e 0 1 |
||||
|
// |
||||
|
// z' = (z*c+d)/(z*e) |
||||
|
// z' = (c/e) + d/(z*e) |
||||
|
// z' - (c/e) = d/(z*e) |
||||
|
// (z'e - c)/e = d/(z*e) |
||||
|
// e / (z'e - c) = (z*e)/d |
||||
|
// (e * d) / (z'e - c) = z*e |
||||
|
// z = d / (z'e - c) |
||||
|
FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) |
||||
|
{ |
||||
|
return -cbFSR2.fDeviceToViewDepth[2] / (fDeviceDepth * cbFSR2.fDeviceToViewDepth[1] - cbFSR2.fDeviceToViewDepth[0]); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DEPTH) |
||||
|
return texelFetch(r_depth, iPxPos, 0).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_REACTIVE_MASK) |
||||
|
return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 GatherReactiveMask(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_REACTIVE_MASK) |
||||
|
return textureGather(sampler2D(r_reactive_mask, s_LinearClamp), FfxFloat32x2(iPxPos) * cbFSR2.reactive_mask_dim_rcp, 0); |
||||
|
#else |
||||
|
return FfxFloat32x4(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadTransparencyAndCompositionMask(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) |
||||
|
return texelFetch(r_transparency_and_composition_mask, iPxPos, 0).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) |
||||
|
fUV *= cbFSR2.depthclip_uv_scale; |
||||
|
return textureLod(sampler2D(r_transparency_and_composition_mask, s_LinearClamp), fUV, 0.0f).x; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 PreExposure() |
||||
|
{ |
||||
|
return cbFSR2.fPreExposure; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_INPUT_COLOR) |
||||
|
return texelFetch(r_input_color_jittered, iPxPos, 0).rgb / PreExposure(); |
||||
|
#else |
||||
|
return FfxFloat32x3(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x3 LoadInputColorWithoutPreExposure(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_INPUT_COLOR) |
||||
|
return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; |
||||
|
#else |
||||
|
return FfxFloat32x3(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) |
||||
|
return texelFetch(r_prepared_input_color, iPxPos, 0).rgb; |
||||
|
#else |
||||
|
return FfxFloat32x3(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadPreparedInputColorLuma(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) |
||||
|
return texelFetch(r_prepared_input_color, iPxPos, 0).a; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_MOTION_VECTORS) |
||||
|
FfxFloat32x2 fSrcMotionVector = texelFetch(r_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; |
||||
|
#else |
||||
|
FfxFloat32x2 fSrcMotionVector = FfxFloat32x2(0.f); |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x2 fUvMotionVector = fSrcMotionVector * cbFSR2.MotionVectorScale; |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS |
||||
|
fUvMotionVector -= cbFSR2.fMotionVectorJitterCancellation; |
||||
|
#endif |
||||
|
|
||||
|
return fUvMotionVector; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) |
||||
|
return texelFetch(r_internal_upscaled_color, iPxHistory, 0); |
||||
|
#else |
||||
|
return FfxFloat32x4(0.0f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 LoadRwInternalUpscaledColorAndWeight(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) |
||||
|
return imageLoad(rw_internal_upscaled_color, iPxPos); |
||||
|
#else |
||||
|
return FfxFloat32x4(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_LUMA_HISTORY) |
||||
|
imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 LoadRwLumaHistory(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_LUMA_HISTORY) |
||||
|
return imageLoad(rw_luma_history, FfxInt32x2(iPxPos)); |
||||
|
#else |
||||
|
return FfxFloat32x4(1.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadLumaStabilityFactor(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_LUMA_HISTORY) |
||||
|
return texelFetch(r_luma_history, FfxInt32x2(iPxPos), 0).w; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 SampleLumaStabilityFactor(FfxFloat32x2 fUV) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_LUMA_HISTORY) |
||||
|
fUV *= cbFSR2.depthclip_uv_scale; |
||||
|
return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f).w; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) |
||||
|
imageStore(rw_internal_upscaled_color, iPxHistory, fHistory); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) |
||||
|
imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) |
||||
|
imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor * PreExposure(), 1.f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x3 LoadLockStatus(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_LOCK_STATUS) |
||||
|
FfxFloat32x3 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rgb; |
||||
|
|
||||
|
fLockStatus[0] -= LockInitialLifetime() * 2.0f; |
||||
|
|
||||
|
return fLockStatus; |
||||
|
#else |
||||
|
return FfxFloat32x3(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x3 LoadRwLockStatus(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_LOCK_STATUS) |
||||
|
FfxFloat32x3 fLockStatus = imageLoad(rw_lock_status, iPxPos).rgb; |
||||
|
|
||||
|
fLockStatus[0] -= LockInitialLifetime() * 2.0f; |
||||
|
|
||||
|
return fLockStatus; |
||||
|
#else |
||||
|
return FfxFloat32x3(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x3 fLockstatus) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_LOCK_STATUS) |
||||
|
fLockstatus[0] += LockInitialLifetime() * 2.0f; |
||||
|
|
||||
|
imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) |
||||
|
imageStore(rw_prepared_input_color, iPxPos, fTonemapped); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxBoolean IsResponsivePixel(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
return FFX_FALSE; //not supported in prototype |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadDepthClip(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DEPTH_CLIP) |
||||
|
return texelFetch(r_depth_clip, iPxPos, 0).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DEPTH_CLIP) |
||||
|
fUV *= cbFSR2.depthclip_uv_scale; |
||||
|
return textureLod(sampler2D(r_depth_clip, s_LinearClamp), fUV, 0.0f).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x3 SampleLockStatus(FfxFloat32x2 fUV) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_LOCK_STATUS) |
||||
|
fUV *= cbFSR2.postprocessed_lockstatus_uv_scale; |
||||
|
FfxFloat32x3 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rgb; |
||||
|
fLockStatus[0] -= LockInitialLifetime() * 2.0f; |
||||
|
return fLockStatus; |
||||
|
#else |
||||
|
return FfxFloat32x3(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreDepthClip(FfxInt32x2 iPxPos, FfxFloat32 fClip) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_DEPTH_CLIP) |
||||
|
imageStore(rw_depth_clip, iPxPos, vec4(fClip, 0.0f, 0.0f, 0.0f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 TanHalfFoV() |
||||
|
{ |
||||
|
return cbFSR2.fTanHalfFOV; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DEPTH) |
||||
|
return texelFetch(r_depth, iPxInput, 0).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) |
||||
|
return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r); |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth) |
||||
|
{ |
||||
|
FfxUInt32 uDepth = floatBitsToUint(fDepth); |
||||
|
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) |
||||
|
#if FFX_FSR2_OPTION_INVERTED_DEPTH |
||||
|
imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); |
||||
|
#else |
||||
|
imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth |
||||
|
#endif |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) |
||||
|
imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_DILATED_DEPTH) |
||||
|
//FfxUInt32 uDepth = f32tof16(fDepth); |
||||
|
imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) |
||||
|
imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) |
||||
|
return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg; |
||||
|
#else |
||||
|
return FfxFloat32x2(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) |
||||
|
fUV *= cbFSR2.depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) |
||||
|
return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg; |
||||
|
#else |
||||
|
return FfxFloat32x2(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_DEPTH) |
||||
|
return texelFetch(r_dilatedDepth, iPxInput, 0).r; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 Exposure() |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_EXPOSURE) |
||||
|
FfxFloat32 exposure = texelFetch(r_exposure, FfxInt32x2(0,0), 0).x; |
||||
|
#else |
||||
|
FfxFloat32 exposure = 1.f; |
||||
|
#endif |
||||
|
|
||||
|
if (exposure == 0.0f) { |
||||
|
exposure = 1.0f; |
||||
|
} |
||||
|
|
||||
|
return exposure; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_LANCZOS_LUT) |
||||
|
return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x; |
||||
|
#else |
||||
|
return 0.f; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) |
||||
|
// Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. |
||||
|
return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r); |
||||
|
#else |
||||
|
return FfxFloat32(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) |
||||
|
fUV *= cbFSR2.depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) |
||||
|
return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg; |
||||
|
#else |
||||
|
return FfxFloat32x2(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) |
||||
|
return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg; |
||||
|
#else |
||||
|
return FfxFloat32x2(0.f); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) |
||||
|
imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
|
||||
|
#endif // #if defined(FFX_GPU) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 5cb9ad224bca34e4fbc3acf33bd82fbe |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 5c9c193ec7dc3064097ab1f74232c98d |
|
||||
|
guid: eb121968296f9ba44b35d7e18d2b79df |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 9cdba182d33eab646b8df7521a7ec649 |
|
||||
|
guid: 2176dca22b6e9604da8329c79abae68d |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -0,0 +1,188 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
FFX_GROUPSHARED FfxUInt32 spdCounter; |
||||
|
|
||||
|
#ifndef SPD_PACKED_ONLY |
||||
|
FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; |
||||
|
FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; |
||||
|
FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; |
||||
|
FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; |
||||
|
|
||||
|
FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat32x3 fRgb = LoadInputColor(FfxInt32x2(tex)); |
||||
|
|
||||
|
FFX_STATIC const FfxFloat32x3 rgb2y = FfxFloat32x3(0.2126, 0.7152, 0.0722); |
||||
|
|
||||
|
//compute log luma |
||||
|
const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, dot(rgb2y, fRgb))); |
||||
|
|
||||
|
// Make sure out of screen pixels contribute no value to the end result |
||||
|
const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; |
||||
|
|
||||
|
return FfxFloat32x4(result, 0, 0, 0); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) |
||||
|
{ |
||||
|
return SPD_LoadMipmap5(tex); |
||||
|
} |
||||
|
|
||||
|
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) |
||||
|
{ |
||||
|
if (index == LumaMipLevelToUse() || index == 5) |
||||
|
{ |
||||
|
SPD_SetMipmap(pix, index, outValue.r); |
||||
|
} |
||||
|
|
||||
|
if (index == MipCount() - 1) { //accumulate on 1x1 level |
||||
|
|
||||
|
if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) |
||||
|
{ |
||||
|
FfxFloat32 prev = SPD_LoadExposureBuffer().y; |
||||
|
FfxUInt32x2 renderSize = SPD_RenderSize(); |
||||
|
FfxFloat32 result = outValue.r / (renderSize.x * renderSize.y); |
||||
|
|
||||
|
if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values |
||||
|
{ |
||||
|
FfxFloat32 rate = 1.0f; |
||||
|
result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); |
||||
|
} |
||||
|
FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); |
||||
|
SPD_SetExposureBuffer(spdOutput); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SpdIncreaseAtomicCounter(FfxUInt32 slice) |
||||
|
{ |
||||
|
SPD_IncreaseAtomicCounter(spdCounter); |
||||
|
} |
||||
|
|
||||
|
FfxUInt32 SpdGetAtomicCounter() |
||||
|
{ |
||||
|
return spdCounter; |
||||
|
} |
||||
|
|
||||
|
void SpdResetAtomicCounter(FfxUInt32 slice) |
||||
|
{ |
||||
|
SPD_ResetAtomicCounter(); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) |
||||
|
{ |
||||
|
return FfxFloat32x4( |
||||
|
spdIntermediateR[x][y], |
||||
|
spdIntermediateG[x][y], |
||||
|
spdIntermediateB[x][y], |
||||
|
spdIntermediateA[x][y]); |
||||
|
} |
||||
|
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) |
||||
|
{ |
||||
|
spdIntermediateR[x][y] = value.x; |
||||
|
spdIntermediateG[x][y] = value.y; |
||||
|
spdIntermediateB[x][y] = value.z; |
||||
|
spdIntermediateA[x][y] = value.w; |
||||
|
} |
||||
|
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) |
||||
|
{ |
||||
|
return (v0 + v1 + v2 + v3); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
// define fetch and store functions Packed |
||||
|
#if FFX_HALF |
||||
|
#error Callback must be implemented |
||||
|
|
||||
|
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; |
||||
|
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; |
||||
|
|
||||
|
FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) |
||||
|
{ |
||||
|
return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]); |
||||
|
} |
||||
|
FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) |
||||
|
{ |
||||
|
return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]); |
||||
|
} |
||||
|
void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
if (index == LumaMipLevelToUse() || index == 5) |
||||
|
{ |
||||
|
imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value); |
||||
|
return; |
||||
|
} |
||||
|
imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value); |
||||
|
} |
||||
|
void SpdIncreaseAtomicCounter(FfxUInt32 slice) |
||||
|
{ |
||||
|
InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter); |
||||
|
} |
||||
|
FfxUInt32 SpdGetAtomicCounter() |
||||
|
{ |
||||
|
return spdCounter; |
||||
|
} |
||||
|
void SpdResetAtomicCounter(FfxUInt32 slice) |
||||
|
{ |
||||
|
rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0; |
||||
|
} |
||||
|
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) |
||||
|
{ |
||||
|
return FfxFloat16x4( |
||||
|
spdIntermediateRG[x][y].x, |
||||
|
spdIntermediateRG[x][y].y, |
||||
|
spdIntermediateBA[x][y].x, |
||||
|
spdIntermediateBA[x][y].y); |
||||
|
} |
||||
|
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) |
||||
|
{ |
||||
|
spdIntermediateRG[x][y] = value.xy; |
||||
|
spdIntermediateBA[x][y] = value.zw; |
||||
|
} |
||||
|
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) |
||||
|
{ |
||||
|
return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#include "ffx_spd.h" |
||||
|
|
||||
|
void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) |
||||
|
{ |
||||
|
#if FFX_HALF |
||||
|
SpdDownsampleH( |
||||
|
FfxUInt32x2(WorkGroupId.xy), |
||||
|
FfxUInt32(LocalThreadIndex), |
||||
|
FfxUInt32(MipCount()), |
||||
|
FfxUInt32(NumWorkGroups()), |
||||
|
FfxUInt32(WorkGroupId.z), |
||||
|
FfxUInt32x2(WorkGroupOffset())); |
||||
|
#else |
||||
|
SpdDownsample( |
||||
|
FfxUInt32x2(WorkGroupId.xy), |
||||
|
FfxUInt32(LocalThreadIndex), |
||||
|
FfxUInt32(MipCount()), |
||||
|
FfxUInt32(NumWorkGroups()), |
||||
|
FfxUInt32(WorkGroupId.z), |
||||
|
FfxUInt32x2(WorkGroupOffset())); |
||||
|
#endif |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: dbcdb6dfb36311a49aa7b05bc5054280 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,171 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_INPUT_COLOR 0 |
||||
|
#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 |
||||
|
#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 |
||||
|
#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3 |
||||
|
#define FSR2_BIND_UAV_EXPOSURE 4 |
||||
|
#define FSR2_BIND_CB_FSR2 5 |
||||
|
#define FSR2_BIND_CB_SPD 6 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
|
||||
|
#if defined(FSR2_BIND_CB_SPD) |
||||
|
layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t |
||||
|
{ |
||||
|
uint mips; |
||||
|
uint numWorkGroups; |
||||
|
uvec2 workGroupOffset; |
||||
|
uvec2 renderSize; |
||||
|
} cbSPD; |
||||
|
|
||||
|
uint MipCount() |
||||
|
{ |
||||
|
return cbSPD.mips; |
||||
|
} |
||||
|
|
||||
|
uint NumWorkGroups() |
||||
|
{ |
||||
|
return cbSPD.numWorkGroups; |
||||
|
} |
||||
|
|
||||
|
uvec2 WorkGroupOffset() |
||||
|
{ |
||||
|
return cbSPD.workGroupOffset; |
||||
|
} |
||||
|
|
||||
|
uvec2 SPD_RenderSize() |
||||
|
{ |
||||
|
return cbSPD.renderSize; |
||||
|
} |
||||
|
#else |
||||
|
uint MipCount() |
||||
|
{ |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
uint NumWorkGroups() |
||||
|
{ |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
uvec2 WorkGroupOffset() |
||||
|
{ |
||||
|
return uvec2(0); |
||||
|
} |
||||
|
|
||||
|
uvec2 SPD_RenderSize() |
||||
|
{ |
||||
|
return uvec2(0); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
vec2 SPD_LoadExposureBuffer() |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE) |
||||
|
return imageLoad(rw_exposure, ivec2(0,0)).xy; |
||||
|
#else |
||||
|
return vec2(0); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SPD_SetExposureBuffer(vec2 value) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE) |
||||
|
imageStore(rw_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
vec4 SPD_LoadMipmap5(ivec2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) |
||||
|
return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f); |
||||
|
#else |
||||
|
return vec4(0); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value) |
||||
|
{ |
||||
|
switch (slice) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) |
||||
|
case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: |
||||
|
imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); |
||||
|
break; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) |
||||
|
case 5: |
||||
|
imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); |
||||
|
break; |
||||
|
#endif |
||||
|
default: |
||||
|
// avoid flattened side effect |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) |
||||
|
imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f)); |
||||
|
#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) |
||||
|
imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f)); |
||||
|
#endif |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SPD_IncreaseAtomicCounter(inout uint spdCounter) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) |
||||
|
spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SPD_ResetAtomicCounter() |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) |
||||
|
imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
#include "ffx_fsr2_compute_luminance_pyramid.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 256 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 43b69b4a78d09164aa834576507445c1 |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,164 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#define FSR2_BIND_SRV_INPUT_COLOR 0 |
||||
|
#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 0 |
||||
|
#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 |
||||
|
#define FSR2_BIND_UAV_EXPOSURE_MIP_5 2 |
||||
|
#define FSR2_BIND_UAV_EXPOSURE 3 |
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
#define FSR2_BIND_CB_SPD 1 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
|
||||
|
#if defined(FSR2_BIND_CB_SPD) |
||||
|
cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { |
||||
|
|
||||
|
uint mips; |
||||
|
uint numWorkGroups; |
||||
|
uint2 workGroupOffset; |
||||
|
uint2 renderSize; |
||||
|
}; |
||||
|
|
||||
|
uint MipCount() |
||||
|
{ |
||||
|
return mips; |
||||
|
} |
||||
|
|
||||
|
uint NumWorkGroups() |
||||
|
{ |
||||
|
return numWorkGroups; |
||||
|
} |
||||
|
|
||||
|
uint2 WorkGroupOffset() |
||||
|
{ |
||||
|
return workGroupOffset; |
||||
|
} |
||||
|
|
||||
|
uint2 SPD_RenderSize() |
||||
|
{ |
||||
|
return renderSize; |
||||
|
} |
||||
|
#else |
||||
|
uint MipCount() |
||||
|
{ |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
uint NumWorkGroups() |
||||
|
{ |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
uint2 WorkGroupOffset() |
||||
|
{ |
||||
|
return uint2(0, 0); |
||||
|
} |
||||
|
|
||||
|
uint2 SPD_RenderSize() |
||||
|
{ |
||||
|
return uint2(0, 0); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
|
||||
|
float2 SPD_LoadExposureBuffer() |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) |
||||
|
return rw_exposure[min16int2(0,0)]; |
||||
|
#else |
||||
|
return 0; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SPD_SetExposureBuffer(float2 value) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) |
||||
|
rw_exposure[min16int2(0,0)] = value; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
float4 SPD_LoadMipmap5(int2 iPxPos) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) |
||||
|
return float4(rw_img_mip_5[iPxPos], 0, 0, 0); |
||||
|
#else |
||||
|
return 0; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SPD_SetMipmap(int2 iPxPos, int slice, float value) |
||||
|
{ |
||||
|
switch (slice) |
||||
|
{ |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) |
||||
|
case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: |
||||
|
rw_img_mip_shading_change[iPxPos] = value; |
||||
|
break; |
||||
|
#endif |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) |
||||
|
case 5: |
||||
|
rw_img_mip_5[iPxPos] = value; |
||||
|
break; |
||||
|
#endif |
||||
|
default: |
||||
|
// avoid flattened side effect |
||||
|
#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) |
||||
|
rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; |
||||
|
#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) |
||||
|
rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; |
||||
|
#endif |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SPD_IncreaseAtomicCounter(inout uint spdCounter) |
||||
|
{ |
||||
|
InterlockedAdd(rw_spd_global_atomic[min16int2(0,0)], 1, spdCounter); |
||||
|
} |
||||
|
|
||||
|
void SPD_ResetAtomicCounter() |
||||
|
{ |
||||
|
rw_spd_global_atomic[min16int2(0,0)] = 0; |
||||
|
} |
||||
|
|
||||
|
#include "ffx_fsr2_compute_luminance_pyramid.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 256 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT |
||||
|
void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) |
||||
|
{ |
||||
|
ComputeAutoExposure(WorkGroupId, LocalThreadIndex); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 19dfb00afb70c3144b43ec2dc05ecdd9 |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,98 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_DEPTH_CLIP_H |
||||
|
#define FFX_FSR2_DEPTH_CLIP_H |
||||
|
|
||||
|
FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; |
||||
|
|
||||
|
FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousDepth, FfxFloat32 fPreviousDepthBilinearWeight, FfxFloat32 fCurrentDepthViewSpace) |
||||
|
{ |
||||
|
FfxFloat32 fPrevNearestDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(fPreviousDepth)); |
||||
|
|
||||
|
// Depth separation logic ref: See "Minimum Triangle Separation for Correct Z-Buffer Occlusion" |
||||
|
// Intention: worst case of formula in Figure4 combined with Ksep factor in Section 4 |
||||
|
// TODO: check intention and improve, some banding visible |
||||
|
const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f; |
||||
|
FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); |
||||
|
|
||||
|
// WARNING: Ksep only works with reversed-z with infinite projection. |
||||
|
const FfxFloat32 Ksep = 1.37e-05f; |
||||
|
FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; |
||||
|
FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; |
||||
|
|
||||
|
FfxFloat32 fDepthClipFactor = (fDepthDiff > 0) ? ffxSaturate(fRequiredDepthSeparation / fDepthDiff) : 1.0f; |
||||
|
|
||||
|
#ifdef _DEBUG |
||||
|
rw_debug_out[iPxSamplePos] = FfxFloat32x4(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace, fDepthDiff, fDepthClipFactor); |
||||
|
#endif |
||||
|
|
||||
|
return fPreviousDepthBilinearWeight * fDepthClipFactor * ffxLerp(1.0f, DepthClipBaseScale, ffxSaturate(fDepthDiff * fDepthDiff)); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthViewSpace) |
||||
|
{ |
||||
|
FfxFloat32x2 fPxSample = fUvSample * RenderSize() - 0.5f; |
||||
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); |
||||
|
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); |
||||
|
|
||||
|
const FfxFloat32 fBilinearWeights[2][2] = { |
||||
|
{ |
||||
|
(1 - fPxFrac.x) * (1 - fPxFrac.y), |
||||
|
(fPxFrac.x) * (1 - fPxFrac.y) |
||||
|
}, |
||||
|
{ |
||||
|
(1 - fPxFrac.x) * (fPxFrac.y), |
||||
|
(fPxFrac.x) * (fPxFrac.y) |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
FfxFloat32 fDepth = 0.0f; |
||||
|
FfxFloat32 fWeightSum = 0.0f; |
||||
|
for (FfxInt32 y = 0; y <= 1; ++y) { |
||||
|
for (FfxInt32 x = 0; x <= 1; ++x) { |
||||
|
FfxInt32x2 iSamplePos = iPxSample + FfxInt32x2(x, y); |
||||
|
if (IsOnScreen(iSamplePos, RenderSize())) { |
||||
|
FfxFloat32 fBilinearWeight = fBilinearWeights[y][x]; |
||||
|
if (fBilinearWeight > reconstructedDepthBilinearWeightThreshold) { |
||||
|
fDepth += ComputeSampleDepthClip(iSamplePos, LoadReconstructedPrevDepth(iSamplePos), fBilinearWeight, fCurrentDepthViewSpace); |
||||
|
fWeightSum += fBilinearWeight; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return (fWeightSum > 0) ? fDepth / fWeightSum : DepthClipBaseScale; |
||||
|
} |
||||
|
|
||||
|
void DepthClip(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); |
||||
|
FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); |
||||
|
FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; |
||||
|
FfxFloat32 fCurrentDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(LoadDilatedDepth(iPxPos))); |
||||
|
|
||||
|
FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fCurrentDepthViewSpace); |
||||
|
|
||||
|
StoreDepthClip(iPxPos, fDepthClip); |
||||
|
} |
||||
|
|
||||
|
#endif //!defined( FFX_FSR2_DEPTH_CLIPH ) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: f7c16477aeb3a9b4f94f4ef818d10d9b |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,62 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 3 |
||||
|
// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_reconstructed_previous_nearest_depth |
||||
|
// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors |
||||
|
// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth |
||||
|
// UAV 12 : FSR2_DepthClip : rw_depth_clip |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 |
||||
|
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 |
||||
|
#define FSR2_BIND_SRV_DILATED_DEPTH 2 |
||||
|
#define FSR2_BIND_UAV_DEPTH_CLIP 3 |
||||
|
#define FSR2_BIND_CB_FSR2 4 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_depth_clip.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
DepthClip(ivec2(gl_GlobalInvocationID.xy)); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: b02126743783b1942a0777c5e9b7526a |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,63 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 3 |
||||
|
// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_reconstructed_previous_nearest_depth |
||||
|
// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors |
||||
|
// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth |
||||
|
// UAV 12 : FSR2_DepthClip : rw_depth_clip |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 |
||||
|
#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 |
||||
|
#define FSR2_BIND_SRV_DILATED_DEPTH 2 |
||||
|
#define FSR2_BIND_UAV_DEPTH_CLIP 0 |
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_depth_clip.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_PREFER_WAVE64 |
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_ROOTSIG_CONTENT |
||||
|
void CS( |
||||
|
int2 iGroupId : SV_GroupID, |
||||
|
int2 iDispatchThreadId : SV_DispatchThreadID, |
||||
|
int2 iGroupThreadId : SV_GroupThreadID, |
||||
|
int iGroupIndex : SV_GroupIndex) |
||||
|
{ |
||||
|
DepthClip(iDispatchThreadId); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 7981b48622ddaa944909ebf209284d83 |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,126 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_LOCK_H |
||||
|
#define FFX_FSR2_LOCK_H |
||||
|
|
||||
|
FfxFloat32 GetLuma(FfxInt32x2 pos) |
||||
|
{ |
||||
|
//add some bias to avoid locking dark areas |
||||
|
return FfxFloat32(LoadPreparedInputColorLuma(pos)); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) |
||||
|
{ |
||||
|
const FfxInt32 RADIUS = 1; |
||||
|
|
||||
|
FfxFloat32 fNucleus = GetLuma(pos); |
||||
|
|
||||
|
FfxFloat32 similar_threshold = 1.05f; |
||||
|
FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX; |
||||
|
FfxFloat32 dissimilarLumaMax = 0; |
||||
|
|
||||
|
/* |
||||
|
0 1 2 |
||||
|
3 4 5 |
||||
|
6 7 8 |
||||
|
*/ |
||||
|
|
||||
|
#define SETBIT(x) (1U << x) |
||||
|
|
||||
|
FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar |
||||
|
|
||||
|
const FfxUInt32 rejectionMasks[4] = { |
||||
|
SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left |
||||
|
SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right |
||||
|
SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left |
||||
|
SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right |
||||
|
}; |
||||
|
|
||||
|
FfxInt32 idx = 0; |
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) { |
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) { |
||||
|
if (x == 0 && y == 0) continue; |
||||
|
|
||||
|
FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); |
||||
|
|
||||
|
FfxFloat32 sampleLuma = GetLuma(samplePos); |
||||
|
FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); |
||||
|
|
||||
|
if (difference > 0 && (difference < similar_threshold)) { |
||||
|
mask |= SETBIT(idx); |
||||
|
} else { |
||||
|
dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma); |
||||
|
dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin; |
||||
|
|
||||
|
if (FFX_FALSE == isRidge) { |
||||
|
|
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 i = 0; i < 4; i++) { |
||||
|
|
||||
|
if ((mask & rejectionMasks[i]) == rejectionMasks[i]) { |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return 1; |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC FfxBoolean s_bLockUpdated = FFX_FALSE; |
||||
|
|
||||
|
FfxFloat32x3 ComputeLockStatus(FfxInt32x2 iPxLrPos, FfxFloat32x3 fLockStatus) |
||||
|
{ |
||||
|
FfxFloat32 fConfidenceOfThinFeature = ComputeThinFeatureConfidence(iPxLrPos); |
||||
|
|
||||
|
s_bLockUpdated = FFX_FALSE; |
||||
|
if (fConfidenceOfThinFeature > 0.0f) |
||||
|
{ |
||||
|
//put to negative on new lock |
||||
|
fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] == FfxFloat32(0.0f)) ? FfxFloat32(-LockInitialLifetime()) : FfxFloat32(-(LockInitialLifetime() * 2)); |
||||
|
|
||||
|
s_bLockUpdated = FFX_TRUE; |
||||
|
} |
||||
|
|
||||
|
return fLockStatus; |
||||
|
} |
||||
|
|
||||
|
void ComputeLock(FfxInt32x2 iPxLrPos) |
||||
|
{ |
||||
|
FfxInt32x2 iPxHrPos = ComputeHrPosFromLrPos(iPxLrPos); |
||||
|
|
||||
|
FfxFloat32x3 fLockStatus = ComputeLockStatus(iPxLrPos, LoadLockStatus(iPxHrPos)); |
||||
|
|
||||
|
if ((s_bLockUpdated)) { |
||||
|
StoreLockStatus(iPxHrPos, fLockStatus); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
#endif // FFX_FSR2_LOCK_H |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 9b4cdc5f81194ac4fa946c31b86234ed |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,65 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 4 |
||||
|
// SRV 5 : m_UpscaleReactive : r_reactive_mask |
||||
|
// SRV 11 : FSR2_LockStatus2 : r_lock_status |
||||
|
// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color |
||||
|
// UAV 11 : FSR2_LockStatus1 : rw_lock_status |
||||
|
// UAV 27 : FSR2_ReactiveMaskMax : rw_reactive_max |
||||
|
// CB 0 : cbFSR2 |
||||
|
// CB 1 : FSR2DispatchOffsets |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_LOCK_STATUS 0 |
||||
|
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 1 |
||||
|
#define FSR2_BIND_UAV_LOCK_STATUS 2 |
||||
|
#define FSR2_BIND_CB_FSR2 3 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_lock.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
uvec2 uDispatchThreadId = gl_WorkGroupID.xy * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy; |
||||
|
|
||||
|
ComputeLock(ivec2(uDispatchThreadId)); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 5f63384878e8f1c428f5716f3b8e1065 |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,60 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 4 |
||||
|
// SRV 5 : m_UpscaleReactive : r_reactive_mask |
||||
|
// SRV 11 : FSR2_LockStatus2 : r_lock_status |
||||
|
// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color |
||||
|
// UAV 11 : FSR2_LockStatus1 : rw_lock_status |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#define FSR2_BIND_SRV_LOCK_STATUS 1 |
||||
|
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 2 |
||||
|
#define FSR2_BIND_UAV_LOCK_STATUS 0 |
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_lock.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_PREFER_WAVE64 |
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_ROOTSIG_CONTENT |
||||
|
void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) |
||||
|
{ |
||||
|
uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; |
||||
|
|
||||
|
ComputeLock(uDispatchThreadId); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 471a3f7a033c72f4fa737d4f8238a9bd |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,98 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H |
||||
|
#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H |
||||
|
|
||||
|
FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample) |
||||
|
{ |
||||
|
return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) |
||||
|
{ |
||||
|
return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) |
||||
|
#else |
||||
|
DeclareCustomFetchBilinearSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) |
||||
|
#endif |
||||
|
DeclareCustomTextureSample(ShadingChangeLumaSample, Bilinear, FetchShadingChangeLumaSamples) |
||||
|
|
||||
|
FfxFloat32 GetShadingChangeLuma(FfxFloat32x2 fUvCoord) |
||||
|
{ |
||||
|
// const FfxFloat32 fShadingChangeLuma = exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()) * LumaMipRcp()); |
||||
|
const FfxFloat32 fShadingChangeLuma = FfxFloat32(exp(SampleMipLuma(fUvCoord, LumaMipLevelToUse()) * FfxFloat32(LumaMipRcp()))); |
||||
|
return fShadingChangeLuma; |
||||
|
} |
||||
|
|
||||
|
LockState GetLockState(FfxFloat32x3 fLockStatus) |
||||
|
{ |
||||
|
LockState state = { FFX_FALSE, FFX_FALSE }; |
||||
|
|
||||
|
//Check if this is a new or refreshed lock |
||||
|
state.NewLock = fLockStatus[LOCK_LIFETIME_REMAINING] < FfxFloat32(0.0f); |
||||
|
|
||||
|
//For a non-refreshed lock, the lifetime is set to LockInitialLifetime() |
||||
|
state.WasLockedPrevFrame = fLockStatus[LOCK_TRUST] != FfxFloat32(0.0f); |
||||
|
|
||||
|
return state; |
||||
|
} |
||||
|
|
||||
|
LockState PostProcessLockStatus(FfxInt32x2 iPxHrPos, FFX_PARAMETER_IN FfxFloat32x2 fLrUvJittered, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, const FfxFloat32 fAccumulationMask, FFX_PARAMETER_IN FfxFloat32 fHrVelocity, |
||||
|
FFX_PARAMETER_INOUT FfxFloat32 fAccumulationTotalWeight, FFX_PARAMETER_INOUT FfxFloat32x3 fLockStatus, FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { |
||||
|
|
||||
|
const LockState state = GetLockState(fLockStatus); |
||||
|
|
||||
|
fLockStatus[LOCK_LIFETIME_REMAINING] = abs(fLockStatus[LOCK_LIFETIME_REMAINING]); |
||||
|
|
||||
|
FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(fLrUvJittered); |
||||
|
|
||||
|
//init temporal shading change factor, init to -1 or so in reproject to know if "true new"? |
||||
|
fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; |
||||
|
|
||||
|
FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; |
||||
|
fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), FfxFloat32(0.5f)); |
||||
|
fLuminanceDiff = FfxFloat32(1) - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); |
||||
|
|
||||
|
if (fLuminanceDiff > FfxFloat32(0.2f)) { |
||||
|
KillLock(fLockStatus); |
||||
|
} |
||||
|
|
||||
|
if (!state.NewLock && fLockStatus[LOCK_LIFETIME_REMAINING] >= FfxFloat32(0)) |
||||
|
{ |
||||
|
fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fAccumulationMask); |
||||
|
|
||||
|
const FfxFloat32 depthClipThreshold = FfxFloat32(0.99f); |
||||
|
if (fDepthClipFactor < depthClipThreshold) |
||||
|
{ |
||||
|
KillLock(fLockStatus); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return state; |
||||
|
} |
||||
|
|
||||
|
#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H ) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 3945c3cfd2cc1a64cb0513864d88d8ca |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,88 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_PREPARE_INPUT_COLOR_H |
||||
|
#define FFX_FSR2_PREPARE_INPUT_COLOR_H |
||||
|
|
||||
|
//TODO: Move to common location & share with Accumulate |
||||
|
void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) |
||||
|
{ |
||||
|
if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) |
||||
|
{ |
||||
|
#if FFX_FSR2_OPTION_INVERTED_DEPTH |
||||
|
const FfxUInt32 farZ = 0x0; |
||||
|
#else |
||||
|
const FfxUInt32 farZ = 0x3f800000; |
||||
|
#endif |
||||
|
SetReconstructedDepth(iPxHrPos, farZ); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ComputeLumaStabilityFactor(FfxInt32x2 iPxLrPos, FfxFloat32 fCurrentFrameLuma) |
||||
|
{ |
||||
|
FfxFloat32x4 fCurrentFrameLumaHistory = LoadRwLumaHistory(iPxLrPos); |
||||
|
|
||||
|
fCurrentFrameLumaHistory.a = FfxFloat32(0); |
||||
|
|
||||
|
if (FrameIndex() > 3) { |
||||
|
FfxFloat32 fDiffs0 = MinDividedByMax(fCurrentFrameLumaHistory[2], fCurrentFrameLuma); |
||||
|
FfxFloat32 fDiffs1 = ffxMax(MinDividedByMax(fCurrentFrameLumaHistory[0], fCurrentFrameLuma), MinDividedByMax(fCurrentFrameLumaHistory[1], fCurrentFrameLuma)); |
||||
|
|
||||
|
fCurrentFrameLumaHistory.a = ffxSaturate(fDiffs1 - fDiffs0); |
||||
|
} |
||||
|
|
||||
|
//move history |
||||
|
fCurrentFrameLumaHistory[0] = fCurrentFrameLumaHistory[1]; |
||||
|
fCurrentFrameLumaHistory[1] = fCurrentFrameLumaHistory[2]; |
||||
|
fCurrentFrameLumaHistory[2] = fCurrentFrameLuma; |
||||
|
|
||||
|
StoreLumaHistory(iPxLrPos, fCurrentFrameLumaHistory); |
||||
|
} |
||||
|
|
||||
|
void PrepareInputColor(FfxInt32x2 iPxLrPos) |
||||
|
{ |
||||
|
//We assume linear data. if non-linear input (sRGB, ...), |
||||
|
//then we should convert to linear first and back to sRGB on output. |
||||
|
|
||||
|
FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); |
||||
|
|
||||
|
fRgb *= Exposure(); |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT |
||||
|
// Tonemap color, used in lockstatus and luma stability computations |
||||
|
fRgb = Tonemap(fRgb); |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x4 fYCoCg; |
||||
|
|
||||
|
fYCoCg.xyz = RGBToYCoCg(fRgb); |
||||
|
|
||||
|
const FfxFloat32 fPerceivedLuma = RGBToPerceivedLuma(fRgb); |
||||
|
ComputeLumaStabilityFactor(iPxLrPos, fPerceivedLuma); |
||||
|
|
||||
|
//compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! |
||||
|
fYCoCg.w = ffxPow(fPerceivedLuma, FfxFloat32(1.0 / 6.0)); |
||||
|
|
||||
|
StorePreparedInputColor(iPxLrPos, fYCoCg); |
||||
|
ClearResourcesForNextFrame(iPxLrPos); |
||||
|
} |
||||
|
|
||||
|
#endif // FFX_FSR2_PREPARE_INPUT_COLOR_H |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 778b4088d62d52f49acf76d0c4d97922 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,62 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
// FSR2 pass 1 |
||||
|
// SRV 1 : m_HDR : r_input_color_jittered |
||||
|
// SRV 4 : FSR2_Exposure : r_exposure |
||||
|
// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth |
||||
|
// UAV 13 : FSR2_PreparedInputColor : rw_prepared_input_color |
||||
|
// UAV 14 : FSR2_LumaHistory : rw_luma_history |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_INPUT_COLOR 0 |
||||
|
#define FSR2_BIND_SRV_EXPOSURE 1 |
||||
|
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 |
||||
|
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 3 |
||||
|
#define FSR2_BIND_UAV_LUMA_HISTORY 4 |
||||
|
#define FSR2_BIND_CB_FSR2 5 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_prepare_input_color.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
PrepareInputColor(ivec2(gl_GlobalInvocationID.xy)); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 130af27aabe0f3347bf8fcfea8d5de84 |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,64 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 1 |
||||
|
// SRV 1 : m_HDR : r_input_color_jittered |
||||
|
// SRV 4 : FSR2_Exposure : r_exposure |
||||
|
// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth |
||||
|
// UAV 13 : FSR2_PreparedInputColor : rw_prepared_input_color |
||||
|
// UAV 14 : FSR2_LumaHistory : rw_luma_history |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#define FSR2_BIND_SRV_INPUT_COLOR 0 |
||||
|
#define FSR2_BIND_SRV_EXPOSURE 1 |
||||
|
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 |
||||
|
#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 1 |
||||
|
#define FSR2_BIND_UAV_LUMA_HISTORY 2 |
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_prepare_input_color.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_ROOTSIG_CONTENT |
||||
|
void CS( |
||||
|
uint2 uGroupId : SV_GroupID, |
||||
|
uint2 uDispatchThreadId : SV_DispatchThreadID, |
||||
|
uint2 uGroupThreadId : SV_GroupThreadID, |
||||
|
uint uGroupIndex : SV_GroupIndex |
||||
|
) |
||||
|
{ |
||||
|
PrepareInputColor(uDispatchThreadId); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 0710c8cc5e741b34882a94dddc5a1a6b |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 755780b5a13da4047a06cae98bbcc0de |
|
||||
|
guid: 7bd7d4eb34c626342966cb9b3fe00363 |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -0,0 +1,92 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 6 |
||||
|
// SRV 4 : m_Exposure : r_exposure |
||||
|
// SRV 19 : FSR2_InternalUpscaled1 : r_rcas_input |
||||
|
// UAV 18 : DisplayOutput : rw_upscaled_output |
||||
|
// CB 0 : cbFSR2 |
||||
|
// CB 1 : cbRCAS |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_EXPOSURE 0 |
||||
|
#define FSR2_BIND_SRV_RCAS_INPUT 1 |
||||
|
#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 |
||||
|
#define FSR2_BIND_CB_FSR2 3 |
||||
|
#define FSR2_BIND_CB_RCAS 4 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
|
||||
|
//Move to prototype shader! |
||||
|
#if defined(FSR2_BIND_CB_RCAS) |
||||
|
layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t |
||||
|
{ |
||||
|
uvec4 rcasConfig; |
||||
|
} cbRCAS; |
||||
|
|
||||
|
uvec4 RCASConfig() |
||||
|
{ |
||||
|
return cbRCAS.rcasConfig; |
||||
|
} |
||||
|
#else |
||||
|
uvec4 RCASConfig() |
||||
|
{ |
||||
|
return uvec4(0); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
vec4 LoadRCAS_Input(FfxInt16x2 iPxPos) |
||||
|
{ |
||||
|
return texelFetch(r_rcas_input, iPxPos, 0); |
||||
|
} |
||||
|
#else |
||||
|
vec4 LoadRCAS_Input(FfxInt32x2 iPxPos) |
||||
|
{ |
||||
|
return texelFetch(r_rcas_input, iPxPos, 0); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#include "ffx_fsr2_rcas.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 64 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 6e70536fee1272645bab8093b1c715e7 |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: 77fd29b4fc373da40881622c212f2505 |
|
||||
|
guid: 871ca1938c701d64f94ef8ec00ef06f4 |
||||
ShaderIncludeImporter: |
ShaderIncludeImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
userData: |
userData: |
||||
@ -0,0 +1,202 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H |
||||
|
#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H |
||||
|
|
||||
|
void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) |
||||
|
{ |
||||
|
FfxFloat32x2 fDepthUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; |
||||
|
FfxFloat32x2 fPxPrevPos = (fDepthUv + fMotionVector) * iPxDepthSize - FfxFloat32x2(0.5, 0.5); |
||||
|
FfxInt32x2 iPxPrevPos = FfxInt32x2(floor(fPxPrevPos)); |
||||
|
FfxFloat32x2 fPxFrac = ffxFract(fPxPrevPos); |
||||
|
|
||||
|
const FfxFloat32 bilinearWeights[2][2] = { |
||||
|
{ |
||||
|
(1 - fPxFrac.x) * (1 - fPxFrac.y), |
||||
|
(fPxFrac.x) * (1 - fPxFrac.y) |
||||
|
}, |
||||
|
{ |
||||
|
(1 - fPxFrac.x) * (fPxFrac.y), |
||||
|
(fPxFrac.x) * (fPxFrac.y) |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
// Project current depth into previous frame locations. |
||||
|
// Push to all pixels having some contribution if reprojection is using bilinear logic. |
||||
|
for (FfxInt32 y = 0; y <= 1; ++y) { |
||||
|
for (FfxInt32 x = 0; x <= 1; ++x) { |
||||
|
|
||||
|
FfxInt32x2 offset = FfxInt32x2(x, y); |
||||
|
FfxFloat32 w = bilinearWeights[y][x]; |
||||
|
|
||||
|
if (w > reconstructedDepthBilinearWeightThreshold) { |
||||
|
|
||||
|
FfxInt32x2 storePos = iPxPrevPos + offset; |
||||
|
if (IsOnScreen(storePos, iPxDepthSize)) { |
||||
|
StoreReconstructedDepth(storePos, fDepth); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord) |
||||
|
{ |
||||
|
const FfxInt32 iSampleCount = 9; |
||||
|
const FfxInt32x2 iSampleOffsets[iSampleCount] = { |
||||
|
FfxInt32x2(+0, +0), |
||||
|
FfxInt32x2(+1, +0), |
||||
|
FfxInt32x2(+0, +1), |
||||
|
FfxInt32x2(+0, -1), |
||||
|
FfxInt32x2(-1, +0), |
||||
|
FfxInt32x2(-1, +1), |
||||
|
FfxInt32x2(+1, +1), |
||||
|
FfxInt32x2(-1, -1), |
||||
|
FfxInt32x2(+1, -1), |
||||
|
}; |
||||
|
|
||||
|
// pull out the depth loads to allow SC to batch them |
||||
|
FfxFloat32 depth[9]; |
||||
|
FfxInt32 iSampleIndex = 0; |
||||
|
FFX_UNROLL |
||||
|
for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { |
||||
|
|
||||
|
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; |
||||
|
depth[iSampleIndex] = LoadInputDepth(iPos); |
||||
|
} |
||||
|
|
||||
|
// find closest depth |
||||
|
fNearestDepthCoord = iPxPos; |
||||
|
fNearestDepth = depth[0]; |
||||
|
FFX_UNROLL |
||||
|
for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { |
||||
|
|
||||
|
FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; |
||||
|
if (IsOnScreen(iPos, iPxSize)) { |
||||
|
|
||||
|
FfxFloat32 fNdDepth = depth[iSampleIndex]; |
||||
|
#if FFX_FSR2_OPTION_INVERTED_DEPTH |
||||
|
if (fNdDepth > fNearestDepth) { |
||||
|
#else |
||||
|
if (fNdDepth < fNearestDepth) { |
||||
|
#endif |
||||
|
fNearestDepthCoord = iPos; |
||||
|
fNearestDepth = fNdDepth; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) |
||||
|
{ |
||||
|
FfxFloat32 minconvergence = 1.0f; |
||||
|
|
||||
|
FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos) * RenderSize(); |
||||
|
FfxFloat32 fNucleusVelocity = length(fMotionVectorNucleus); |
||||
|
|
||||
|
const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; |
||||
|
|
||||
|
if (fNucleusVelocity > MotionVectorVelocityEpsilon) { |
||||
|
for (FfxInt32 y = -1; y <= 1; ++y) { |
||||
|
for (FfxInt32 x = -1; x <= 1; ++x) { |
||||
|
|
||||
|
FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); |
||||
|
|
||||
|
FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp) * RenderSize(); |
||||
|
FfxFloat32 fVelocity = length(fMotionVector); |
||||
|
|
||||
|
fVelocity = ffxMax(fVelocity, fNucleusVelocity); |
||||
|
minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocity, fMotionVectorNucleus / fVelocity)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return ffxSaturate(1.0f - minconvergence); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) |
||||
|
{ |
||||
|
// Compensate for bilinear sampling in accumulation pass |
||||
|
|
||||
|
FfxFloat32x3 fReferenceColor = LoadPreparedInputColor(iPxLrPos); |
||||
|
FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); |
||||
|
|
||||
|
for (int y = -1; y < 2; y++) { |
||||
|
for (int x = -1; x < 2; x++) { |
||||
|
|
||||
|
const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); |
||||
|
|
||||
|
FfxFloat32x3 fColorSample = LoadPreparedInputColor(sampleCoord); |
||||
|
FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); |
||||
|
FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); |
||||
|
|
||||
|
const FfxFloat32 fColorSimilarity = dot(normalize(fReferenceColor), normalize(fColorSample)); |
||||
|
const FfxFloat32 fVelocitySimilarity = 1.0f - abs(length(fReferenceColor) - length(fColorSample)); |
||||
|
const FfxFloat32 fSimilarity = fColorSimilarity * fVelocitySimilarity; |
||||
|
|
||||
|
// Increase power for non-similar samples |
||||
|
const FfxFloat32 fPowerBiasMax = 6.0f; |
||||
|
const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); |
||||
|
const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); |
||||
|
const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); |
||||
|
|
||||
|
fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); |
||||
|
} |
||||
|
|
||||
|
void ReconstructAndDilate(FfxInt32x2 iPxLrPos) |
||||
|
{ |
||||
|
FfxFloat32 fDilatedDepth; |
||||
|
FfxInt32x2 iNearestDepthCoord; |
||||
|
|
||||
|
FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord); |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS |
||||
|
FfxInt32x2 iSamplePos = iPxLrPos; |
||||
|
FfxInt32x2 iMotionVectorPos = iNearestDepthCoord; |
||||
|
#else |
||||
|
FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos); |
||||
|
FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord); |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); |
||||
|
|
||||
|
StoreDilatedDepth(iPxLrPos, fDilatedDepth); |
||||
|
StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector); |
||||
|
|
||||
|
ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS |
||||
|
FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); |
||||
|
#else |
||||
|
FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, DisplaySize()); |
||||
|
#endif |
||||
|
|
||||
|
PreProcessReactiveMasks(iPxLrPos, fMotionDivergence); |
||||
|
} |
||||
|
|
||||
|
|
||||
|
#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 13cd33c3d34a317409049dfd939e64ef |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,68 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 2 |
||||
|
// SRV 2 : m_MotionVector : r_motion_vectors |
||||
|
// SRV 3 : m_depthbuffer : r_depth |
||||
|
// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth |
||||
|
// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors |
||||
|
// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#version 450 |
||||
|
|
||||
|
#extension GL_GOOGLE_include_directive : require |
||||
|
#extension GL_EXT_samplerless_texture_functions : require |
||||
|
|
||||
|
#define FSR2_BIND_SRV_MOTION_VECTORS 0 |
||||
|
#define FSR2_BIND_SRV_DEPTH 1 |
||||
|
#define FSR2_BIND_SRV_REACTIVE_MASK 2 |
||||
|
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 3 |
||||
|
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 4 |
||||
|
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5 |
||||
|
#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6 |
||||
|
#define FSR2_BIND_UAV_DILATED_DEPTH 7 |
||||
|
#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 8 |
||||
|
#define FSR2_BIND_CB_FSR2 9 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_glsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
void main() |
||||
|
{ |
||||
|
ReconstructAndDilate(FFX_MIN16_I2(gl_GlobalInvocationID.xy)); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: e3f9c69c87e772f4387b1bd3363a8688 |
||||
|
DefaultImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,70 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
// FSR2 pass 2 |
||||
|
// SRV 2 : m_MotionVector : r_motion_vectors |
||||
|
// SRV 3 : m_depthbuffer : r_depth |
||||
|
// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth |
||||
|
// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors |
||||
|
// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth |
||||
|
// CB 0 : cbFSR2 |
||||
|
|
||||
|
#define FSR2_BIND_SRV_MOTION_VECTORS 0 |
||||
|
#define FSR2_BIND_SRV_DEPTH 1 |
||||
|
#define FSR2_BIND_SRV_REACTIVE_MASK 2 |
||||
|
#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 3 |
||||
|
#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 4 |
||||
|
#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 |
||||
|
#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 1 |
||||
|
#define FSR2_BIND_UAV_DILATED_DEPTH 2 |
||||
|
#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 3 |
||||
|
#define FSR2_BIND_CB_FSR2 0 |
||||
|
|
||||
|
#include "ffx_fsr2_callbacks_hlsl.h" |
||||
|
#include "ffx_fsr2_common.h" |
||||
|
#include "ffx_fsr2_sample.h" |
||||
|
#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" |
||||
|
|
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_WIDTH 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT |
||||
|
#ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#define FFX_FSR2_THREAD_GROUP_DEPTH 1 |
||||
|
#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH |
||||
|
#ifndef FFX_FSR2_NUM_THREADS |
||||
|
#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] |
||||
|
#endif // #ifndef FFX_FSR2_NUM_THREADS |
||||
|
|
||||
|
FFX_FSR2_PREFER_WAVE64 |
||||
|
FFX_FSR2_NUM_THREADS |
||||
|
FFX_FSR2_EMBED_ROOTSIG_CONTENT |
||||
|
void CS( |
||||
|
int2 iGroupId : SV_GroupID, |
||||
|
int2 iDispatchThreadId : SV_DispatchThreadID, |
||||
|
int2 iGroupThreadId : SV_GroupThreadID, |
||||
|
int iGroupIndex : SV_GroupIndex |
||||
|
) |
||||
|
{ |
||||
|
ReconstructAndDilate(iDispatchThreadId); |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 9355c255c8505ae48ae89af286943747 |
||||
|
ShaderIncludeImporter: |
||||
|
externalObjects: {} |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,125 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_REPROJECT_H |
||||
|
#define FFX_FSR2_REPROJECT_H |
||||
|
|
||||
|
#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE |
||||
|
#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 // Approximate |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) |
||||
|
{ |
||||
|
return LoadHistory(iPxSample); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample) |
||||
|
{ |
||||
|
return FFX_MIN16_F4(LoadHistory(iPxSample)); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
|
||||
|
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory) |
||||
|
DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) |
||||
|
#else |
||||
|
DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) |
||||
|
DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) |
||||
|
{ |
||||
|
return FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample) |
||||
|
{ |
||||
|
return FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0f); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#if 1 |
||||
|
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus) |
||||
|
DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples) |
||||
|
#else |
||||
|
DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus) |
||||
|
DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples) |
||||
|
#endif |
||||
|
#else |
||||
|
#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus) |
||||
|
DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) |
||||
|
#else |
||||
|
DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus) |
||||
|
DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) |
||||
|
#endif |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) |
||||
|
{ |
||||
|
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS |
||||
|
FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); |
||||
|
#else |
||||
|
FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); |
||||
|
#endif |
||||
|
|
||||
|
return fDilatedMotionVector; |
||||
|
} |
||||
|
|
||||
|
void ComputeReprojectedUVs(FfxInt32x2 iPxHrPos, FfxFloat32x2 fMotionVector, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) |
||||
|
{ |
||||
|
FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); |
||||
|
fReprojectedHrUv = fHrUv + fMotionVector; |
||||
|
|
||||
|
bIsExistingSample = (fReprojectedHrUv.x >= 0.0f && fReprojectedHrUv.x <= 1.0f) && |
||||
|
(fReprojectedHrUv.y >= 0.0f && fReprojectedHrUv.y <= 1.0f); |
||||
|
} |
||||
|
|
||||
|
void ReprojectHistoryColor(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x4 fHistoryColorAndWeight) |
||||
|
{ |
||||
|
fHistoryColorAndWeight = HistorySample(fReprojectedHrUv, DisplaySize()); |
||||
|
fHistoryColorAndWeight.rgb *= Exposure(); |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_HDR_COLOR_INPUT |
||||
|
fHistoryColorAndWeight.rgb = Tonemap(fHistoryColorAndWeight.rgb); |
||||
|
#endif |
||||
|
|
||||
|
fHistoryColorAndWeight.rgb = RGBToYCoCg(fHistoryColorAndWeight.rgb); |
||||
|
} |
||||
|
|
||||
|
void ReprojectHistoryLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x3 fReprojectedLockStatus) |
||||
|
{ |
||||
|
// If function is called from Accumulate pass, we need to treat locks differently |
||||
|
FfxFloat32 fInPlaceLockLifetime = LoadRwLockStatus(iPxHrPos)[LOCK_LIFETIME_REMAINING]; |
||||
|
|
||||
|
fReprojectedLockStatus = SampleLockStatus(fReprojectedHrUv); |
||||
|
|
||||
|
// Keep lifetime if new lock |
||||
|
if (fInPlaceLockLifetime < 0.0f) { |
||||
|
fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] = fInPlaceLockLifetime; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
#endif //!defined( FFX_FSR2_REPROJECT_H ) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 63fc917ca6895cb4aac237ea35edb838 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -1,5 +1,5 @@ |
|||||
fileFormatVersion: 2 |
fileFormatVersion: 2 |
||||
guid: f54305f51c7d4314e8f18b6470ca188e |
|
||||
|
guid: fd5bed2bf4ba07444ae815390168a15d |
||||
PluginImporter: |
PluginImporter: |
||||
externalObjects: {} |
externalObjects: {} |
||||
serializedVersion: 2 |
serializedVersion: 2 |
||||
@ -0,0 +1,602 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_SAMPLE_H |
||||
|
#define FFX_FSR2_SAMPLE_H |
||||
|
|
||||
|
// suppress warnings |
||||
|
#ifdef FFX_HLSL |
||||
|
#pragma warning(disable: 4008) // potentially divide by zero |
||||
|
#endif //FFX_HLSL |
||||
|
|
||||
|
struct FetchedBilinearSamples { |
||||
|
|
||||
|
FfxFloat32x4 fColor00; |
||||
|
FfxFloat32x4 fColor10; |
||||
|
|
||||
|
FfxFloat32x4 fColor01; |
||||
|
FfxFloat32x4 fColor11; |
||||
|
}; |
||||
|
|
||||
|
struct FetchedBicubicSamples { |
||||
|
|
||||
|
FfxFloat32x4 fColor00; |
||||
|
FfxFloat32x4 fColor10; |
||||
|
FfxFloat32x4 fColor20; |
||||
|
FfxFloat32x4 fColor30; |
||||
|
|
||||
|
FfxFloat32x4 fColor01; |
||||
|
FfxFloat32x4 fColor11; |
||||
|
FfxFloat32x4 fColor21; |
||||
|
FfxFloat32x4 fColor31; |
||||
|
|
||||
|
FfxFloat32x4 fColor02; |
||||
|
FfxFloat32x4 fColor12; |
||||
|
FfxFloat32x4 fColor22; |
||||
|
FfxFloat32x4 fColor32; |
||||
|
|
||||
|
FfxFloat32x4 fColor03; |
||||
|
FfxFloat32x4 fColor13; |
||||
|
FfxFloat32x4 fColor23; |
||||
|
FfxFloat32x4 fColor33; |
||||
|
}; |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
struct FetchedBilinearSamplesMin16 { |
||||
|
|
||||
|
FFX_MIN16_F4 fColor00; |
||||
|
FFX_MIN16_F4 fColor10; |
||||
|
|
||||
|
FFX_MIN16_F4 fColor01; |
||||
|
FFX_MIN16_F4 fColor11; |
||||
|
}; |
||||
|
|
||||
|
struct FetchedBicubicSamplesMin16 { |
||||
|
|
||||
|
FFX_MIN16_F4 fColor00; |
||||
|
FFX_MIN16_F4 fColor10; |
||||
|
FFX_MIN16_F4 fColor20; |
||||
|
FFX_MIN16_F4 fColor30; |
||||
|
|
||||
|
FFX_MIN16_F4 fColor01; |
||||
|
FFX_MIN16_F4 fColor11; |
||||
|
FFX_MIN16_F4 fColor21; |
||||
|
FFX_MIN16_F4 fColor31; |
||||
|
|
||||
|
FFX_MIN16_F4 fColor02; |
||||
|
FFX_MIN16_F4 fColor12; |
||||
|
FFX_MIN16_F4 fColor22; |
||||
|
FFX_MIN16_F4 fColor32; |
||||
|
|
||||
|
FFX_MIN16_F4 fColor03; |
||||
|
FFX_MIN16_F4 fColor13; |
||||
|
FFX_MIN16_F4 fColor23; |
||||
|
FFX_MIN16_F4 fColor33; |
||||
|
}; |
||||
|
#else //FFX_HALF |
||||
|
#define FetchedBicubicSamplesMin16 FetchedBicubicSamples |
||||
|
#define FetchedBilinearSamplesMin16 FetchedBilinearSamples |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) |
||||
|
{ |
||||
|
return A + (B - A) * t; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) |
||||
|
{ |
||||
|
FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); |
||||
|
return fColorXY; |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t) |
||||
|
{ |
||||
|
return A + (B - A) * t; |
||||
|
} |
||||
|
|
||||
|
FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac) |
||||
|
{ |
||||
|
FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); |
||||
|
return fColorXY; |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) |
||||
|
{ |
||||
|
const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants |
||||
|
return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32 Lanczos2(FfxFloat32 x) |
||||
|
{ |
||||
|
x = ffxMin(abs(x), 2.0f); |
||||
|
return Lanczos2NoClamp(x); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) |
||||
|
{ |
||||
|
const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants |
||||
|
return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); |
||||
|
} |
||||
|
|
||||
|
FFX_MIN16_F Lanczos2(FFX_MIN16_F x) |
||||
|
{ |
||||
|
x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); |
||||
|
return Lanczos2NoClamp(x); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
// FSR1 lanczos approximation. Input is x*x and must be <= 4. |
||||
|
FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) |
||||
|
{ |
||||
|
FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; |
||||
|
FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; |
||||
|
return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) |
||||
|
{ |
||||
|
FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1); |
||||
|
FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); |
||||
|
return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) |
||||
|
{ |
||||
|
x2 = ffxMin(x2, 4.0f); |
||||
|
return Lanczos2ApproxSqNoClamp(x2); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) |
||||
|
{ |
||||
|
x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); |
||||
|
return Lanczos2ApproxSqNoClamp(x2); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) |
||||
|
{ |
||||
|
return Lanczos2ApproxSqNoClamp(x * x); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x) |
||||
|
{ |
||||
|
return Lanczos2ApproxSqNoClamp(x * x); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32 Lanczos2Approx(FfxFloat32 x) |
||||
|
{ |
||||
|
return Lanczos2ApproxSq(x * x); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x) |
||||
|
{ |
||||
|
return Lanczos2ApproxSq(x * x); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) |
||||
|
{ |
||||
|
return SampleLanczos2Weight(abs(x)); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) |
||||
|
{ |
||||
|
return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) |
||||
|
{ |
||||
|
FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); |
||||
|
FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); |
||||
|
FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); |
||||
|
FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); |
||||
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); |
||||
|
} |
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) |
||||
|
{ |
||||
|
FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t); |
||||
|
FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t); |
||||
|
FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t); |
||||
|
FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t); |
||||
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) |
||||
|
{ |
||||
|
FfxFloat32 fWeight0 = Lanczos2(-1.f - t); |
||||
|
FfxFloat32 fWeight1 = Lanczos2(-0.f - t); |
||||
|
FfxFloat32 fWeight2 = Lanczos2(+1.f - t); |
||||
|
FfxFloat32 fWeight3 = Lanczos2(+2.f - t); |
||||
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) |
||||
|
{ |
||||
|
FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); |
||||
|
|
||||
|
// Deringing |
||||
|
|
||||
|
// TODO: only use 4 by checking jitter |
||||
|
const FfxInt32 iDeringingSampleCount = 4; |
||||
|
const FfxFloat32x4 fDeringingSamples[4] = { |
||||
|
Samples.fColor11, |
||||
|
Samples.fColor21, |
||||
|
Samples.fColor12, |
||||
|
Samples.fColor22, |
||||
|
}; |
||||
|
|
||||
|
FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; |
||||
|
FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { |
||||
|
|
||||
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); |
||||
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); |
||||
|
} |
||||
|
|
||||
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); |
||||
|
|
||||
|
return fColorXY; |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) |
||||
|
{ |
||||
|
FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t); |
||||
|
FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t); |
||||
|
FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t); |
||||
|
FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t); |
||||
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); |
||||
|
} |
||||
|
|
||||
|
FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) |
||||
|
{ |
||||
|
FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); |
||||
|
|
||||
|
// Deringing |
||||
|
|
||||
|
// TODO: only use 4 by checking jitter |
||||
|
const FfxInt32 iDeringingSampleCount = 4; |
||||
|
const FFX_MIN16_F4 fDeringingSamples[4] = { |
||||
|
Samples.fColor11, |
||||
|
Samples.fColor21, |
||||
|
Samples.fColor12, |
||||
|
Samples.fColor22, |
||||
|
}; |
||||
|
|
||||
|
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; |
||||
|
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) |
||||
|
{ |
||||
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); |
||||
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); |
||||
|
} |
||||
|
|
||||
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); |
||||
|
|
||||
|
return fColorXY; |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
|
||||
|
FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) |
||||
|
{ |
||||
|
FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); |
||||
|
|
||||
|
// Deringing |
||||
|
|
||||
|
// TODO: only use 4 by checking jitter |
||||
|
const FfxInt32 iDeringingSampleCount = 4; |
||||
|
const FfxFloat32x4 fDeringingSamples[4] = { |
||||
|
Samples.fColor11, |
||||
|
Samples.fColor21, |
||||
|
Samples.fColor12, |
||||
|
Samples.fColor22, |
||||
|
}; |
||||
|
|
||||
|
FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; |
||||
|
FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { |
||||
|
|
||||
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); |
||||
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); |
||||
|
} |
||||
|
|
||||
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); |
||||
|
|
||||
|
return fColorXY; |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) |
||||
|
{ |
||||
|
FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); |
||||
|
|
||||
|
// Deringing |
||||
|
|
||||
|
// TODO: only use 4 by checking jitter |
||||
|
const FfxInt32 iDeringingSampleCount = 4; |
||||
|
const FFX_MIN16_F4 fDeringingSamples[4] = { |
||||
|
Samples.fColor11, |
||||
|
Samples.fColor21, |
||||
|
Samples.fColor12, |
||||
|
Samples.fColor22, |
||||
|
}; |
||||
|
|
||||
|
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; |
||||
|
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) |
||||
|
{ |
||||
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); |
||||
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); |
||||
|
} |
||||
|
|
||||
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); |
||||
|
|
||||
|
return fColorXY; |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
|
||||
|
|
||||
|
FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) |
||||
|
{ |
||||
|
FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); |
||||
|
FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); |
||||
|
FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); |
||||
|
FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); |
||||
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) |
||||
|
{ |
||||
|
FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t); |
||||
|
FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t); |
||||
|
FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t); |
||||
|
FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t); |
||||
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) |
||||
|
{ |
||||
|
FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); |
||||
|
FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); |
||||
|
|
||||
|
// Deringing |
||||
|
|
||||
|
// TODO: only use 4 by checking jitter |
||||
|
const FfxInt32 iDeringingSampleCount = 4; |
||||
|
const FfxFloat32x4 fDeringingSamples[4] = { |
||||
|
Samples.fColor11, |
||||
|
Samples.fColor21, |
||||
|
Samples.fColor12, |
||||
|
Samples.fColor22, |
||||
|
}; |
||||
|
|
||||
|
FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; |
||||
|
FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) |
||||
|
{ |
||||
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); |
||||
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); |
||||
|
} |
||||
|
|
||||
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); |
||||
|
|
||||
|
return fColorXY; |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) |
||||
|
{ |
||||
|
FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); |
||||
|
FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); |
||||
|
|
||||
|
// Deringing |
||||
|
|
||||
|
// TODO: only use 4 by checking jitter |
||||
|
const FfxInt32 iDeringingSampleCount = 4; |
||||
|
const FFX_MIN16_F4 fDeringingSamples[4] = { |
||||
|
Samples.fColor11, |
||||
|
Samples.fColor21, |
||||
|
Samples.fColor12, |
||||
|
Samples.fColor22, |
||||
|
}; |
||||
|
|
||||
|
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; |
||||
|
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) |
||||
|
{ |
||||
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); |
||||
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); |
||||
|
} |
||||
|
|
||||
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); |
||||
|
|
||||
|
return fColorXY; |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. |
||||
|
FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) |
||||
|
{ |
||||
|
FfxInt32x2 result = iPxSample + iPxOffset; |
||||
|
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; |
||||
|
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; |
||||
|
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; |
||||
|
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; |
||||
|
return result; |
||||
|
} |
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) |
||||
|
{ |
||||
|
FFX_MIN16_I2 result = iPxSample + iPxOffset; |
||||
|
result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; |
||||
|
result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; |
||||
|
result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; |
||||
|
result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; |
||||
|
return result; |
||||
|
} |
||||
|
#endif //FFX_HALF |
||||
|
|
||||
|
|
||||
|
#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ |
||||
|
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ |
||||
|
{ \ |
||||
|
SampleType Samples; \ |
||||
|
\ |
||||
|
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ |
||||
|
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ |
||||
|
Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ |
||||
|
Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ |
||||
|
\ |
||||
|
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ |
||||
|
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ |
||||
|
Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ |
||||
|
Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ |
||||
|
\ |
||||
|
Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ |
||||
|
Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ |
||||
|
Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ |
||||
|
Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ |
||||
|
\ |
||||
|
Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ |
||||
|
Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ |
||||
|
Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ |
||||
|
Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ |
||||
|
\ |
||||
|
return Samples; \ |
||||
|
} |
||||
|
|
||||
|
#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ |
||||
|
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) |
||||
|
|
||||
|
#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ |
||||
|
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) |
||||
|
|
||||
|
#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ |
||||
|
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ |
||||
|
{ \ |
||||
|
SampleType Samples; \ |
||||
|
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ |
||||
|
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ |
||||
|
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ |
||||
|
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ |
||||
|
return Samples; \ |
||||
|
} |
||||
|
|
||||
|
#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ |
||||
|
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) |
||||
|
|
||||
|
#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ |
||||
|
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) |
||||
|
|
||||
|
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) |
||||
|
// is common, so iPxSample can "jitter" |
||||
|
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ |
||||
|
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ |
||||
|
{ \ |
||||
|
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ |
||||
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ |
||||
|
/* Clamp base coords */ \ |
||||
|
iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ |
||||
|
iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ |
||||
|
/* */ \ |
||||
|
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ |
||||
|
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ |
||||
|
return fColorXY; \ |
||||
|
} |
||||
|
|
||||
|
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ |
||||
|
FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ |
||||
|
{ \ |
||||
|
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ |
||||
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ |
||||
|
/* Clamp base coords */ \ |
||||
|
iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ |
||||
|
iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ |
||||
|
/* */ \ |
||||
|
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ |
||||
|
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ |
||||
|
return fColorXY; \ |
||||
|
} |
||||
|
|
||||
|
#define FFX_FSR2_CONCAT_ID(x, y) x ## y |
||||
|
#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y) |
||||
|
#define FFX_FSR2_SAMPLER_1D_0 Lanczos2 |
||||
|
#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT |
||||
|
#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx |
||||
|
|
||||
|
#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x) |
||||
|
|
||||
|
#endif //!defined( FFX_FSR2_SAMPLE_H ) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: ba3ee190167b6c240aaeb1f8f4dbcb67 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,214 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifndef FFX_FSR2_UPSAMPLE_H |
||||
|
#define FFX_FSR2_UPSAMPLE_H |
||||
|
|
||||
|
#define FFX_FSR2_OPTION_GUARANTEE_POSITIVE_UPSAMPLE_WEIGHT 0 |
||||
|
|
||||
|
FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; |
||||
|
|
||||
|
void Deringing(RectificationBoxData clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) |
||||
|
{ |
||||
|
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); |
||||
|
} |
||||
|
#if FFX_HALF |
||||
|
void Deringing(RectificationBoxDataMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) |
||||
|
{ |
||||
|
fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE |
||||
|
#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 1 // Approximate |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2 fKernelWeight) |
||||
|
{ |
||||
|
FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; |
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE |
||||
|
FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); |
||||
|
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT |
||||
|
FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); |
||||
|
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE |
||||
|
FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); |
||||
|
#else |
||||
|
#error "Invalid Lanczos type" |
||||
|
#endif |
||||
|
return fSampleWeight; |
||||
|
} |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F2 fKernelWeight) |
||||
|
{ |
||||
|
FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; |
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE |
||||
|
FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); |
||||
|
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_APPROXIMATE |
||||
|
FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); |
||||
|
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_LUT |
||||
|
FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); |
||||
|
// To Test: Save reciproqual sqrt compute |
||||
|
// FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); |
||||
|
#else |
||||
|
#error "Invalid Lanczos type" |
||||
|
#endif |
||||
|
return fSampleWeight; |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32 Pow3(FfxFloat32 x) |
||||
|
{ |
||||
|
return x * x * x; |
||||
|
} |
||||
|
|
||||
|
#if FX_HALF |
||||
|
FFX_MIN16_F Pow3(FFX_MIN16_F x) |
||||
|
{ |
||||
|
return x * x * x; |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fKernelWeight, FFX_PARAMETER_INOUT RectificationBoxData clippingBox) |
||||
|
{ |
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
#include "ffx_fsr2_force16_begin.h" |
||||
|
#endif |
||||
|
// We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) |
||||
|
FfxFloat32x2 fDstOutputPos = FfxFloat32x2(iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position |
||||
|
FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position |
||||
|
FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
#include "ffx_fsr2_force16_end.h" |
||||
|
#endif |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
#include "ffx_fsr2_force16_begin.h" |
||||
|
RectificationBoxMin16 fRectificationBox; |
||||
|
#else |
||||
|
RectificationBox fRectificationBox; |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat32x3 fSamples[iLanczos2SampleCount]; |
||||
|
|
||||
|
|
||||
|
FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 |
||||
|
|
||||
|
FfxInt32x2 offsetTL; |
||||
|
offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); |
||||
|
offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); |
||||
|
|
||||
|
//Load samples |
||||
|
// If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. |
||||
|
// Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. |
||||
|
// This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. |
||||
|
const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; |
||||
|
const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; |
||||
|
|
||||
|
FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 row = 0; row < 4; row++) { |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 col = 0; col < 4; col++) { |
||||
|
FfxInt32 iSampleIndex = col + (row << 2); |
||||
|
|
||||
|
FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); |
||||
|
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; |
||||
|
|
||||
|
const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); |
||||
|
|
||||
|
fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
RectificationBoxReset(fRectificationBox, fSamples[0]); |
||||
|
|
||||
|
FfxFloat32x3 fColor = FfxFloat32x3(0.f, 0.f, 0.f); |
||||
|
FfxFloat32 fWeight = FfxFloat32(0.f); |
||||
|
FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 row = 0; row < 3; row++) { |
||||
|
|
||||
|
FFX_UNROLL |
||||
|
for (FfxInt32 col = 0; col < 3; col++) { |
||||
|
FfxInt32 iSampleIndex = col + (row << 2); |
||||
|
|
||||
|
const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); |
||||
|
const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); |
||||
|
FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; |
||||
|
|
||||
|
FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; |
||||
|
|
||||
|
FfxFloat32 fSampleWeight = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))) * GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelWeight); |
||||
|
|
||||
|
// Update rectification box |
||||
|
const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); |
||||
|
FfxFloat32 fBoxSampleWeight = FfxFloat32(1) - ffxSaturate(fSrcSampleOffsetSq / FfxFloat32(3)); |
||||
|
fBoxSampleWeight *= fBoxSampleWeight; |
||||
|
RectificationBoxAddSample(fRectificationBox, fSamples[iSampleIndex], fBoxSampleWeight); |
||||
|
|
||||
|
fWeight += fSampleWeight; |
||||
|
fColor += fSampleWeight * fSamples[iSampleIndex]; |
||||
|
} |
||||
|
} |
||||
|
// Normalize for deringing (we need to compare colors) |
||||
|
fColor = fColor / (abs(fWeight) > FSR2_EPSILON ? fWeight : FfxFloat32(1.f)); |
||||
|
|
||||
|
RectificationBoxComputeVarianceBoxData(fRectificationBox); |
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
RectificationBoxDataMin16 rectificationData = RectificationBoxGetData(fRectificationBox); |
||||
|
clippingBox.aabbMax = rectificationData.aabbMax; |
||||
|
clippingBox.aabbMin = rectificationData.aabbMin; |
||||
|
clippingBox.boxCenter = rectificationData.boxCenter; |
||||
|
clippingBox.boxVec = rectificationData.boxVec; |
||||
|
#else |
||||
|
RectificationBoxData rectificationData = RectificationBoxGetData(fRectificationBox); |
||||
|
clippingBox = rectificationData; |
||||
|
#endif |
||||
|
|
||||
|
Deringing(rectificationData, fColor); |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
clippingBox.aabbMax = rectificationData.aabbMax; |
||||
|
clippingBox.aabbMin = rectificationData.aabbMin; |
||||
|
clippingBox.boxCenter = rectificationData.boxCenter; |
||||
|
clippingBox.boxVec = rectificationData.boxVec; |
||||
|
#endif |
||||
|
|
||||
|
if (any(FFX_LESS_THAN(fKernelWeight, FfxFloat32x2(1, 1)))) { |
||||
|
fWeight = FfxFloat32(averageLanczosWeightPerFrame); |
||||
|
} |
||||
|
#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF |
||||
|
#include "ffx_fsr2_force16_end.h" |
||||
|
#endif |
||||
|
|
||||
|
#if FFX_FSR2_OPTION_GUARANTEE_POSITIVE_UPSAMPLE_WEIGHT |
||||
|
return FfxFloat32x4(fColor, ffxMax(FfxFloat32(FSR2_EPSILON), fWeight)); |
||||
|
#else |
||||
|
return FfxFloat32x4(fColor, ffxMax(FfxFloat32(0), fWeight)); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
#endif //!defined( FFX_FSR2_UPSAMPLE_H ) |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 1ff3a385cfe07db4387e4d7b457238f8 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
@ -0,0 +1,936 @@ |
|||||
|
// This file is part of the FidelityFX SDK. |
||||
|
// |
||||
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. |
||||
|
// |
||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
|
// of this software and associated documentation files (the "Software"), to deal |
||||
|
// in the Software without restriction, including without limitation the rights |
||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
|
// copies of the Software, and to permit persons to whom the Software is |
||||
|
// furnished to do so, subject to the following conditions: |
||||
|
// The above copyright notice and this permission notice shall be included in |
||||
|
// all copies or substantial portions of the Software. |
||||
|
// |
||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
||||
|
// THE SOFTWARE. |
||||
|
|
||||
|
#ifdef FFX_CPU |
||||
|
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy |
||||
|
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant |
||||
|
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant |
||||
|
FfxUInt32x4 rectInfo, // left, top, width, height |
||||
|
FfxInt32 mips) // optional: if -1, calculate based on rect width and height |
||||
|
{ |
||||
|
workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left |
||||
|
workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top |
||||
|
|
||||
|
FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width |
||||
|
FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height |
||||
|
|
||||
|
dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; |
||||
|
dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; |
||||
|
|
||||
|
numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); |
||||
|
|
||||
|
if (mips >= 0) |
||||
|
{ |
||||
|
numWorkGroupsAndMips[1] = FfxUInt32(mips); |
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
// calculate based on rect width and height |
||||
|
FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]); |
||||
|
numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12)))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy |
||||
|
FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant |
||||
|
FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant |
||||
|
FfxUInt32x4 rectInfo) // left, top, width, height |
||||
|
{ |
||||
|
SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); |
||||
|
} |
||||
|
#endif // #ifdef FFX_CPU |
||||
|
|
||||
|
|
||||
|
//============================================================================================================================== |
||||
|
// NON-PACKED VERSION |
||||
|
//============================================================================================================================== |
||||
|
#ifdef FFX_GPU |
||||
|
#ifdef SPD_PACKED_ONLY |
||||
|
// Avoid compiler error |
||||
|
FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice) |
||||
|
{ |
||||
|
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice) |
||||
|
{ |
||||
|
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); |
||||
|
} |
||||
|
void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
} |
||||
|
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) |
||||
|
{ |
||||
|
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); |
||||
|
} |
||||
|
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) |
||||
|
{ |
||||
|
} |
||||
|
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) |
||||
|
{ |
||||
|
return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); |
||||
|
} |
||||
|
#endif // #ifdef SPD_PACKED_ONLY |
||||
|
|
||||
|
//_____________________________________________________________/\_______________________________________________________________ |
||||
|
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) |
||||
|
#extension GL_KHR_shader_subgroup_quad:require |
||||
|
#endif |
||||
|
|
||||
|
void SpdWorkgroupShuffleBarrier() |
||||
|
{ |
||||
|
#ifdef FFX_GLSL |
||||
|
barrier(); |
||||
|
#endif |
||||
|
#ifdef FFX_HLSL |
||||
|
GroupMemoryBarrierWithGroupSync(); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
// Only last active workgroup should proceed |
||||
|
bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice) |
||||
|
{ |
||||
|
// global atomic counter |
||||
|
if (localInvocationIndex == 0) |
||||
|
{ |
||||
|
SpdIncreaseAtomicCounter(slice); |
||||
|
} |
||||
|
|
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
return (SpdGetAtomicCounter() != (numWorkGroups - 1)); |
||||
|
} |
||||
|
|
||||
|
// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3); |
||||
|
FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) |
||||
|
{ |
||||
|
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) |
||||
|
|
||||
|
FfxFloat32x4 v0 = v; |
||||
|
FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v); |
||||
|
FfxFloat32x4 v2 = subgroupQuadSwapVertical(v); |
||||
|
FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v); |
||||
|
return SpdReduce4(v0, v1, v2, v3); |
||||
|
|
||||
|
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) |
||||
|
|
||||
|
// requires SM6.0 |
||||
|
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); |
||||
|
FfxFloat32x4 v0 = v; |
||||
|
FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1); |
||||
|
FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2); |
||||
|
FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3); |
||||
|
return SpdReduce4(v0, v1, v2, v3); |
||||
|
/* |
||||
|
// if SM6.0 is not available, you can use the AMD shader intrinsics |
||||
|
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library: |
||||
|
// https://gpuopen.com/amd-gpu-services-ags-library/ |
||||
|
// works for DX11 |
||||
|
FfxFloat32x4 v0 = v; |
||||
|
FfxFloat32x4 v1; |
||||
|
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
FfxFloat32x4 v2; |
||||
|
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
FfxFloat32x4 v3; |
||||
|
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
return SpdReduce4(v0, v1, v2, v3); |
||||
|
*/ |
||||
|
#endif |
||||
|
return v; |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) |
||||
|
{ |
||||
|
FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y); |
||||
|
FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y); |
||||
|
FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y); |
||||
|
FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y); |
||||
|
return SpdReduce4(v0, v1, v2, v3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice); |
||||
|
FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice); |
||||
|
FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice); |
||||
|
FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice); |
||||
|
return SpdReduce4(v0, v1, v2, v3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice) |
||||
|
{ |
||||
|
return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice); |
||||
|
FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice); |
||||
|
FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice); |
||||
|
FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice); |
||||
|
return SpdReduce4(v0, v1, v2, v3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_LINEAR_SAMPLER |
||||
|
return SpdLoadSourceImage(FfxInt32x2(base), slice); |
||||
|
#else |
||||
|
return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat32x4 v[4]; |
||||
|
|
||||
|
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); |
||||
|
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); |
||||
|
v[0] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[0], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); |
||||
|
v[1] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[1], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); |
||||
|
v[2] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[2], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); |
||||
|
v[3] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[3], 0, slice); |
||||
|
|
||||
|
if (mip <= 1) |
||||
|
return; |
||||
|
|
||||
|
v[0] = SpdReduceQuad(v[0]); |
||||
|
v[1] = SpdReduceQuad(v[1]); |
||||
|
v[2] = SpdReduceQuad(v[2]); |
||||
|
v[3] = SpdReduceQuad(v[3]); |
||||
|
|
||||
|
if ((localInvocationIndex % 4) == 0) |
||||
|
{ |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); |
||||
|
SpdStoreIntermediate(x / 2, y / 2, v[0]); |
||||
|
|
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); |
||||
|
SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]); |
||||
|
|
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); |
||||
|
SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]); |
||||
|
|
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); |
||||
|
SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat32x4 v[4]; |
||||
|
|
||||
|
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); |
||||
|
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); |
||||
|
v[0] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[0], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); |
||||
|
v[1] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[1], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); |
||||
|
v[2] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[2], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); |
||||
|
v[3] = SpdReduceLoadSourceImage(tex, slice); |
||||
|
SpdStore(pix, v[3], 0, slice); |
||||
|
|
||||
|
if (mip <= 1) |
||||
|
return; |
||||
|
|
||||
|
for (FfxUInt32 i = 0; i < 4; i++) |
||||
|
{ |
||||
|
SpdStoreIntermediate(x, y, v[i]); |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); |
||||
|
} |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
} |
||||
|
|
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
SpdStoreIntermediate(x + 0, y + 0, v[0]); |
||||
|
SpdStoreIntermediate(x + 8, y + 0, v[1]); |
||||
|
SpdStoreIntermediate(x + 0, y + 8, v[2]); |
||||
|
SpdStoreIntermediate(x + 8, y + 8, v[3]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); |
||||
|
#else |
||||
|
SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
|
||||
|
void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); |
||||
|
// store to LDS, try to reduce bank conflicts |
||||
|
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 |
||||
|
// ... |
||||
|
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 |
||||
|
SpdStoreIntermediate(x * 2 + y % 2, y * 2, v); |
||||
|
} |
||||
|
#else |
||||
|
FfxFloat32x4 v = SpdLoadIntermediate(x, y); |
||||
|
v = SpdReduceQuad(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); |
||||
|
SpdStoreIntermediate(x + (y / 2) % 2, y, v); |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 16) |
||||
|
{ |
||||
|
// x 0 x 0 |
||||
|
// 0 0 0 0 |
||||
|
// 0 x 0 x |
||||
|
// 0 0 0 0 |
||||
|
FfxFloat32x4 v = |
||||
|
SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); |
||||
|
// store to LDS |
||||
|
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 |
||||
|
// ... |
||||
|
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 |
||||
|
// ... |
||||
|
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x |
||||
|
// ... |
||||
|
SpdStoreIntermediate(x * 4 + y, y * 4, v); |
||||
|
} |
||||
|
#else |
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2); |
||||
|
v = SpdReduceQuad(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); |
||||
|
SpdStoreIntermediate(x * 2 + y / 2, y * 2, v); |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 4) |
||||
|
{ |
||||
|
// x 0 0 0 x 0 0 0 |
||||
|
// ... |
||||
|
// 0 x 0 0 0 x 0 0 |
||||
|
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), |
||||
|
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), |
||||
|
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), |
||||
|
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); |
||||
|
// store to LDS |
||||
|
// x x x x 0 ... |
||||
|
// 0 ... |
||||
|
SpdStoreIntermediate(x + y * 2, 0, v); |
||||
|
} |
||||
|
#else |
||||
|
if (localInvocationIndex < 16) |
||||
|
{ |
||||
|
FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4); |
||||
|
v = SpdReduceQuad(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); |
||||
|
SpdStoreIntermediate(x / 2 + y, 0, v); |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 1) |
||||
|
{ |
||||
|
// x x x x 0 ... |
||||
|
// 0 ... |
||||
|
FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); |
||||
|
} |
||||
|
#else |
||||
|
if (localInvocationIndex < 4) |
||||
|
{ |
||||
|
FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0); |
||||
|
v = SpdReduceQuad(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); |
||||
|
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); |
||||
|
FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice); |
||||
|
SpdStore(pix, v0, 6, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); |
||||
|
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); |
||||
|
FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice); |
||||
|
SpdStore(pix, v1, 6, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); |
||||
|
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); |
||||
|
FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice); |
||||
|
SpdStore(pix, v2, 6, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); |
||||
|
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); |
||||
|
FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice); |
||||
|
SpdStore(pix, v3, 6, slice); |
||||
|
|
||||
|
if (mips <= 7) |
||||
|
return; |
||||
|
// no barrier needed, working on values only from the same thread |
||||
|
|
||||
|
FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3); |
||||
|
SpdStore(FfxInt32x2(x, y), v, 7, slice); |
||||
|
SpdStoreIntermediate(x, y, v); |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
if (mips <= baseMip) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); |
||||
|
|
||||
|
if (mips <= baseMip + 1) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); |
||||
|
|
||||
|
if (mips <= baseMip + 2) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); |
||||
|
|
||||
|
if (mips <= baseMip + 3) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); |
||||
|
} |
||||
|
|
||||
|
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); |
||||
|
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); |
||||
|
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); |
||||
|
SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); |
||||
|
|
||||
|
SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); |
||||
|
|
||||
|
if (mips <= 6) |
||||
|
return; |
||||
|
|
||||
|
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) |
||||
|
return; |
||||
|
|
||||
|
SpdResetAtomicCounter(slice); |
||||
|
|
||||
|
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. |
||||
|
SpdDownsampleMips_6_7(x, y, mips, slice); |
||||
|
|
||||
|
SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); |
||||
|
} |
||||
|
|
||||
|
void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) |
||||
|
{ |
||||
|
SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); |
||||
|
} |
||||
|
|
||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// |
||||
|
|
||||
|
//============================================================================================================================== |
||||
|
// PACKED VERSION |
||||
|
//============================================================================================================================== |
||||
|
|
||||
|
#if FFX_HALF |
||||
|
|
||||
|
#ifdef FFX_GLSL |
||||
|
#extension GL_EXT_shader_subgroup_extended_types_float16:require |
||||
|
#endif |
||||
|
|
||||
|
FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) |
||||
|
{ |
||||
|
#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) |
||||
|
FfxFloat16x4 v0 = v; |
||||
|
FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v); |
||||
|
FfxFloat16x4 v2 = subgroupQuadSwapVertical(v); |
||||
|
FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v); |
||||
|
return SpdReduce4H(v0, v1, v2, v3); |
||||
|
#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) |
||||
|
// requires SM6.0 |
||||
|
FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); |
||||
|
FfxFloat16x4 v0 = v; |
||||
|
FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1); |
||||
|
FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2); |
||||
|
FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3); |
||||
|
return SpdReduce4H(v0, v1, v2, v3); |
||||
|
/* |
||||
|
// if SM6.0 is not available, you can use the AMD shader intrinsics |
||||
|
// the AMD shader intrinsics are available in AMD GPU Services (AGS) library: |
||||
|
// https://gpuopen.com/amd-gpu-services-ags-library/ |
||||
|
// works for DX11 |
||||
|
FfxFloat16x4 v0 = v; |
||||
|
FfxFloat16x4 v1; |
||||
|
v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); |
||||
|
FfxFloat16x4 v2; |
||||
|
v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); |
||||
|
FfxFloat16x4 v3; |
||||
|
v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); |
||||
|
return SpdReduce4H(v0, v1, v2, v3); |
||||
|
*/ |
||||
|
#endif |
||||
|
return FfxFloat16x4(0.0, 0.0, 0.0, 0.0); |
||||
|
} |
||||
|
|
||||
|
FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) |
||||
|
{ |
||||
|
FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y); |
||||
|
FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y); |
||||
|
FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y); |
||||
|
FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y); |
||||
|
return SpdReduce4H(v0, v1, v2, v3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice); |
||||
|
FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice); |
||||
|
FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice); |
||||
|
FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice); |
||||
|
return SpdReduce4H(v0, v1, v2, v3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice) |
||||
|
{ |
||||
|
return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); |
||||
|
} |
||||
|
|
||||
|
FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice); |
||||
|
FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice); |
||||
|
FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice); |
||||
|
FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice); |
||||
|
return SpdReduce4H(v0, v1, v2, v3); |
||||
|
} |
||||
|
|
||||
|
FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_LINEAR_SAMPLER |
||||
|
return SpdLoadSourceImageH(FfxInt32x2(base), slice); |
||||
|
#else |
||||
|
return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat16x4 v[4]; |
||||
|
|
||||
|
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); |
||||
|
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); |
||||
|
v[0] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[0], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); |
||||
|
v[1] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[1], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); |
||||
|
v[2] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[2], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); |
||||
|
v[3] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[3], 0, slice); |
||||
|
|
||||
|
if (mips <= 1) |
||||
|
return; |
||||
|
|
||||
|
v[0] = SpdReduceQuadH(v[0]); |
||||
|
v[1] = SpdReduceQuadH(v[1]); |
||||
|
v[2] = SpdReduceQuadH(v[2]); |
||||
|
v[3] = SpdReduceQuadH(v[3]); |
||||
|
|
||||
|
if ((localInvocationIndex % 4) == 0) |
||||
|
{ |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); |
||||
|
SpdStoreIntermediateH(x / 2, y / 2, v[0]); |
||||
|
|
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); |
||||
|
SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]); |
||||
|
|
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); |
||||
|
SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]); |
||||
|
|
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); |
||||
|
SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxFloat16x4 v[4]; |
||||
|
|
||||
|
FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); |
||||
|
FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); |
||||
|
v[0] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[0], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); |
||||
|
v[1] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[1], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); |
||||
|
v[2] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[2], 0, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); |
||||
|
pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); |
||||
|
v[3] = SpdReduceLoadSourceImageH(tex, slice); |
||||
|
SpdStoreH(pix, v[3], 0, slice); |
||||
|
|
||||
|
if (mips <= 1) |
||||
|
return; |
||||
|
|
||||
|
for (FfxInt32 i = 0; i < 4; i++) |
||||
|
{ |
||||
|
SpdStoreIntermediateH(x, y, v[i]); |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); |
||||
|
} |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
} |
||||
|
|
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
SpdStoreIntermediateH(x + 0, y + 0, v[0]); |
||||
|
SpdStoreIntermediateH(x + 8, y + 0, v[1]); |
||||
|
SpdStoreIntermediateH(x + 0, y + 8, v[2]); |
||||
|
SpdStoreIntermediateH(x + 8, y + 8, v[3]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); |
||||
|
#else |
||||
|
SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
|
||||
|
void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); |
||||
|
// store to LDS, try to reduce bank conflicts |
||||
|
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 |
||||
|
// ... |
||||
|
// x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 |
||||
|
SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v); |
||||
|
} |
||||
|
#else |
||||
|
FfxFloat16x4 v = SpdLoadIntermediateH(x, y); |
||||
|
v = SpdReduceQuadH(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); |
||||
|
SpdStoreIntermediateH(x + (y / 2) % 2, y, v); |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 16) |
||||
|
{ |
||||
|
// x 0 x 0 |
||||
|
// 0 0 0 0 |
||||
|
// 0 x 0 x |
||||
|
// 0 0 0 0 |
||||
|
FfxFloat16x4 v = |
||||
|
SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); |
||||
|
// store to LDS |
||||
|
// x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
||||
|
// 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 |
||||
|
// ... |
||||
|
// 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 |
||||
|
// ... |
||||
|
// 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x |
||||
|
// ... |
||||
|
SpdStoreIntermediateH(x * 4 + y, y * 4, v); |
||||
|
} |
||||
|
#else |
||||
|
if (localInvocationIndex < 64) |
||||
|
{ |
||||
|
FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2); |
||||
|
v = SpdReduceQuadH(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); |
||||
|
SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v); |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 4) |
||||
|
{ |
||||
|
// x 0 0 0 x 0 0 0 |
||||
|
// ... |
||||
|
// 0 x 0 0 0 x 0 0 |
||||
|
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), |
||||
|
FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), |
||||
|
FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), |
||||
|
FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); |
||||
|
// store to LDS |
||||
|
// x x x x 0 ... |
||||
|
// 0 ... |
||||
|
SpdStoreIntermediateH(x + y * 2, 0, v); |
||||
|
} |
||||
|
#else |
||||
|
if (localInvocationIndex < 16) |
||||
|
{ |
||||
|
FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4); |
||||
|
v = SpdReduceQuadH(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); |
||||
|
SpdStoreIntermediateH(x / 2 + y, 0, v); |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) |
||||
|
{ |
||||
|
#ifdef SPD_NO_WAVE_OPERATIONS |
||||
|
if (localInvocationIndex < 1) |
||||
|
{ |
||||
|
// x x x x 0 ... |
||||
|
// 0 ... |
||||
|
FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); |
||||
|
} |
||||
|
#else |
||||
|
if (localInvocationIndex < 4) |
||||
|
{ |
||||
|
FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0); |
||||
|
v = SpdReduceQuadH(v); |
||||
|
// quad index 0 stores result |
||||
|
if (localInvocationIndex % 4 == 0) |
||||
|
{ |
||||
|
SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); |
||||
|
} |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); |
||||
|
FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); |
||||
|
FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice); |
||||
|
SpdStoreH(pix, v0, 6, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); |
||||
|
pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); |
||||
|
FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice); |
||||
|
SpdStoreH(pix, v1, 6, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); |
||||
|
pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); |
||||
|
FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice); |
||||
|
SpdStoreH(pix, v2, 6, slice); |
||||
|
|
||||
|
tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); |
||||
|
pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); |
||||
|
FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice); |
||||
|
SpdStoreH(pix, v3, 6, slice); |
||||
|
|
||||
|
if (mips < 8) |
||||
|
return; |
||||
|
// no barrier needed, working on values only from the same thread |
||||
|
|
||||
|
FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3); |
||||
|
SpdStoreH(FfxInt32x2(x, y), v, 7, slice); |
||||
|
SpdStoreIntermediateH(x, y, v); |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) |
||||
|
{ |
||||
|
if (mips <= baseMip) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); |
||||
|
|
||||
|
if (mips <= baseMip + 1) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); |
||||
|
|
||||
|
if (mips <= baseMip + 2) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); |
||||
|
|
||||
|
if (mips <= baseMip + 3) |
||||
|
return; |
||||
|
SpdWorkgroupShuffleBarrier(); |
||||
|
SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) |
||||
|
{ |
||||
|
FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); |
||||
|
FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); |
||||
|
FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); |
||||
|
|
||||
|
SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); |
||||
|
|
||||
|
SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); |
||||
|
|
||||
|
if (mips < 7) |
||||
|
return; |
||||
|
|
||||
|
if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) |
||||
|
return; |
||||
|
|
||||
|
SpdResetAtomicCounter(slice); |
||||
|
|
||||
|
// After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. |
||||
|
SpdDownsampleMips_6_7H(x, y, mips, slice); |
||||
|
|
||||
|
SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); |
||||
|
} |
||||
|
|
||||
|
void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) |
||||
|
{ |
||||
|
SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); |
||||
|
} |
||||
|
|
||||
|
#endif // #if FFX_HALF |
||||
|
#endif // #ifdef FFX_GPU |
||||
@ -0,0 +1,27 @@ |
|||||
|
fileFormatVersion: 2 |
||||
|
guid: 3ef69a900a925bb498651c10581e0979 |
||||
|
PluginImporter: |
||||
|
externalObjects: {} |
||||
|
serializedVersion: 2 |
||||
|
iconMap: {} |
||||
|
executionOrder: {} |
||||
|
defineConstraints: [] |
||||
|
isPreloaded: 0 |
||||
|
isOverridable: 0 |
||||
|
isExplicitlyReferenced: 0 |
||||
|
validateReferences: 1 |
||||
|
platformData: |
||||
|
- first: |
||||
|
Any: |
||||
|
second: |
||||
|
enabled: 1 |
||||
|
settings: {} |
||||
|
- first: |
||||
|
Editor: Editor |
||||
|
second: |
||||
|
enabled: 0 |
||||
|
settings: |
||||
|
DefaultValueInitialized: true |
||||
|
userData: |
||||
|
assetBundleName: |
||||
|
assetBundleVariant: |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue