You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
494 lines
23 KiB
494 lines
23 KiB
// This file is part of the FidelityFX SDK.
|
|
//
|
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
|
|
#ifndef FFX_FSR2_SAMPLE_H
|
|
#define FFX_FSR2_SAMPLE_H
|
|
|
|
// suppress warnings
|
|
#ifdef FFX_HLSL
|
|
#pragma warning(disable: 4008) // potentially divide by zero
|
|
#endif //FFX_HLSL
|
|
|
|
struct FetchedBilinearSamples {
|
|
|
|
FfxFloat32x4 fColor00;
|
|
FfxFloat32x4 fColor10;
|
|
|
|
FfxFloat32x4 fColor01;
|
|
FfxFloat32x4 fColor11;
|
|
};
|
|
|
|
struct FetchedBicubicSamples {
|
|
|
|
FfxFloat32x4 fColor00;
|
|
FfxFloat32x4 fColor10;
|
|
FfxFloat32x4 fColor20;
|
|
FfxFloat32x4 fColor30;
|
|
|
|
FfxFloat32x4 fColor01;
|
|
FfxFloat32x4 fColor11;
|
|
FfxFloat32x4 fColor21;
|
|
FfxFloat32x4 fColor31;
|
|
|
|
FfxFloat32x4 fColor02;
|
|
FfxFloat32x4 fColor12;
|
|
FfxFloat32x4 fColor22;
|
|
FfxFloat32x4 fColor32;
|
|
|
|
FfxFloat32x4 fColor03;
|
|
FfxFloat32x4 fColor13;
|
|
FfxFloat32x4 fColor23;
|
|
FfxFloat32x4 fColor33;
|
|
};
|
|
|
|
#if FFX_HALF
|
|
struct FetchedBicubicSamplesMin16 {
|
|
|
|
FfxFloat16x4 fColor00;
|
|
FfxFloat16x4 fColor10;
|
|
FfxFloat16x4 fColor20;
|
|
FfxFloat16x4 fColor30;
|
|
|
|
FfxFloat16x4 fColor01;
|
|
FfxFloat16x4 fColor11;
|
|
FfxFloat16x4 fColor21;
|
|
FfxFloat16x4 fColor31;
|
|
|
|
FfxFloat16x4 fColor02;
|
|
FfxFloat16x4 fColor12;
|
|
FfxFloat16x4 fColor22;
|
|
FfxFloat16x4 fColor32;
|
|
|
|
FfxFloat16x4 fColor03;
|
|
FfxFloat16x4 fColor13;
|
|
FfxFloat16x4 fColor23;
|
|
FfxFloat16x4 fColor33;
|
|
};
|
|
#else //FFX_HALF
|
|
#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
|
|
{
|
|
return A + (B - A) * t;
|
|
}
|
|
|
|
FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
|
|
{
|
|
FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
|
|
FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
|
|
FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
|
|
return fColorXY;
|
|
}
|
|
|
|
// SEE: ../Interpolation/CatmullRom.ipynb, t=0 -> B, t=1 -> C
|
|
FfxFloat32x4 CubicCatmullRom(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32x4 C, FfxFloat32x4 D, FfxFloat32 t)
|
|
{
|
|
FfxFloat32 t2 = t * t;
|
|
FfxFloat32 t3 = t * t * t;
|
|
FfxFloat32x4 a = -A / 2.f + (3.f * B) / 2.f - (3.f * C) / 2.f + D / 2.f;
|
|
FfxFloat32x4 b = A - (5.f * B) / 2.f + 2.f * C - D / 2.f;
|
|
FfxFloat32x4 c = -A / 2.f + C / 2.f;
|
|
FfxFloat32x4 d = B;
|
|
return a * t3 + b * t2 + c * t + d;
|
|
}
|
|
|
|
FfxFloat32x4 BicubicCatmullRom(FetchedBicubicSamples BicubicSamples, FfxFloat32x2 fPxFrac)
|
|
{
|
|
FfxFloat32x4 fColorX0 = CubicCatmullRom(BicubicSamples.fColor00, BicubicSamples.fColor10, BicubicSamples.fColor20, BicubicSamples.fColor30, fPxFrac.x);
|
|
FfxFloat32x4 fColorX1 = CubicCatmullRom(BicubicSamples.fColor01, BicubicSamples.fColor11, BicubicSamples.fColor21, BicubicSamples.fColor31, fPxFrac.x);
|
|
FfxFloat32x4 fColorX2 = CubicCatmullRom(BicubicSamples.fColor02, BicubicSamples.fColor12, BicubicSamples.fColor22, BicubicSamples.fColor32, fPxFrac.x);
|
|
FfxFloat32x4 fColorX3 = CubicCatmullRom(BicubicSamples.fColor03, BicubicSamples.fColor13, BicubicSamples.fColor23, BicubicSamples.fColor33, fPxFrac.x);
|
|
FfxFloat32x4 fColorXY = CubicCatmullRom(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
|
return fColorXY;
|
|
}
|
|
|
|
FfxFloat32 Lanczos2(FfxFloat32 x)
|
|
{
|
|
const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
|
|
return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16 Lanczos2(FfxFloat16 x)
|
|
{
|
|
const FFX_MIN16_F PI = FfxFloat16(3.141592653589793f); // TODO: share SDK constants
|
|
return abs(x) < FSR2_EPSILON ? FfxFloat16(1.f) : (sin(PI * x) / (PI * x)) * (sin(FfxFloat16(0.5f) * PI * x) / (FfxFloat16(0.5f) * PI * x));
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
// FSR1 lanczos approximation. Input is x*x and must be <= 4.
|
|
FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
|
|
{
|
|
FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
|
|
FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
|
|
return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16 Lanczos2ApproxSqNoClamp(FfxFloat16 x2)
|
|
{
|
|
FfxFloat16 a = FfxFloat16(2.0f / 5.0f) * x2 - FfxFloat16(1);
|
|
FfxFloat16 b = FfxFloat16(1.0f / 4.0f) * x2 - FfxFloat16(1);
|
|
return (FfxFloat16(25.0f / 16.0f) * a * a - FfxFloat16(25.0f / 16.0f - 1)) * (b * b);
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
|
|
{
|
|
x2 = ffxMin(x2, 4.0f);
|
|
return Lanczos2ApproxSqNoClamp(x2);
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16 Lanczos2ApproxSq(FfxFloat16 x2)
|
|
{
|
|
x2 = ffxMin(x2, FfxFloat16(4.0f));
|
|
return Lanczos2ApproxSqNoClamp(x2);
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
|
|
{
|
|
return Lanczos2ApproxSqNoClamp(x * x);
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16 Lanczos2ApproxNoClamp(FfxFloat16 x)
|
|
{
|
|
return Lanczos2ApproxSqNoClamp(x * x);
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32 Lanczos2Approx(FfxFloat32 x)
|
|
{
|
|
return Lanczos2ApproxSq(x * x);
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16 Lanczos2Approx(FfxFloat16 x)
|
|
{
|
|
return Lanczos2ApproxSq(x * x);
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
|
|
{
|
|
return SampleLanczos2Weight(abs(x));
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16 Lanczos2_UseLUT(FfxFloat16 x)
|
|
{
|
|
return SampleLanczos2Weight(abs(x));
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
#if FFX_FSR2_OPTION_USE_LANCZOS_LUT
|
|
FfxFloat32x4 Lanczos2_AllowLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
|
{
|
|
FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
|
|
FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
|
|
FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
|
|
FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
|
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
|
}
|
|
#if FFX_HALF
|
|
FfxFloat16x4 Lanczos2_AllowLUT(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t)
|
|
{
|
|
FfxFloat16 fWeight0 = Lanczos2_UseLUT(FfxFloat16(-1.f) - t);
|
|
FfxFloat16 fWeight1 = Lanczos2_UseLUT(FfxFloat16(-0.f) - t);
|
|
FfxFloat16 fWeight2 = Lanczos2_UseLUT(FfxFloat16(+1.f) - t);
|
|
FfxFloat16 fWeight3 = Lanczos2_UseLUT(FfxFloat16(+2.f) - t);
|
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
|
}
|
|
#endif //FFX_HALF
|
|
#else //FFX_FSR2_OPTION_USE_LANCZOS_LUT
|
|
#define Lanczos2_AllowLUT Lanczos2
|
|
#endif //FFX_FSR2_OPTION_USE_LANCZOS_LUT
|
|
|
|
FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
|
{
|
|
FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
|
|
FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
|
|
FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
|
|
FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
|
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
|
}
|
|
|
|
FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
|
{
|
|
FfxFloat32x4 fColorX0 = Lanczos2_AllowLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
|
FfxFloat32x4 fColorX1 = Lanczos2_AllowLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
|
FfxFloat32x4 fColorX2 = Lanczos2_AllowLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
|
FfxFloat32x4 fColorX3 = Lanczos2_AllowLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
|
FfxFloat32x4 fColorXY = Lanczos2_AllowLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
|
|
|
// Deringing
|
|
|
|
// TODO: only use 4 by checking jitter
|
|
const FfxInt32 iDeringingSampleCount = 4;
|
|
const FfxFloat32x4 fDeringingSamples[4] = {
|
|
Samples.fColor11,
|
|
Samples.fColor21,
|
|
Samples.fColor12,
|
|
Samples.fColor22,
|
|
};
|
|
|
|
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
|
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
|
|
|
FFX_UNROLL
|
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
|
|
|
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
|
}
|
|
|
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
|
|
|
return fColorXY;
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16x4 Lanczos2(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t)
|
|
{
|
|
FfxFloat16 fWeight0 = Lanczos2(FfxFloat16(-1.f) - t);
|
|
FfxFloat16 fWeight1 = Lanczos2(FfxFloat16(-0.f) - t);
|
|
FfxFloat16 fWeight2 = Lanczos2(FfxFloat16(+1.f) - t);
|
|
FfxFloat16 fWeight3 = Lanczos2(FfxFloat16(+2.f) - t);
|
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
|
}
|
|
|
|
FfxFloat16x4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
|
|
{
|
|
FfxFloat16x4 fColorX0 = Lanczos2_AllowLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
|
FfxFloat16x4 fColorX1 = Lanczos2_AllowLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
|
FfxFloat16x4 fColorX2 = Lanczos2_AllowLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
|
FfxFloat16x4 fColorX3 = Lanczos2_AllowLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
|
FfxFloat16x4 fColorXY = Lanczos2_AllowLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
|
|
|
// Deringing
|
|
|
|
// TODO: only use 4 by checking jitter
|
|
const FfxInt32 iDeringingSampleCount = 4;
|
|
const FfxFloat16x4 fDeringingSamples[4] = {
|
|
Samples.fColor11,
|
|
Samples.fColor21,
|
|
Samples.fColor12,
|
|
Samples.fColor22,
|
|
};
|
|
|
|
FfxFloat16x4 fDeringingMin = fDeringingSamples[0];
|
|
FfxFloat16x4 fDeringingMax = fDeringingSamples[0];
|
|
|
|
FFX_UNROLL
|
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
|
{
|
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
|
}
|
|
|
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
|
|
|
return fColorXY;
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
|
|
{
|
|
FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
|
|
FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
|
|
FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
|
|
FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
|
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16x4 Lanczos2Approx(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t)
|
|
{
|
|
FfxFloat16 fWeight0 = Lanczos2ApproxNoClamp(FfxFloat16(-1.f) - t);
|
|
FfxFloat16 fWeight1 = Lanczos2ApproxNoClamp(FfxFloat16(-0.f) - t);
|
|
FfxFloat16 fWeight2 = Lanczos2ApproxNoClamp(FfxFloat16(+1.f) - t);
|
|
FfxFloat16 fWeight3 = Lanczos2ApproxNoClamp(FfxFloat16(+2.f) - t);
|
|
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
|
|
{
|
|
FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
|
FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
|
FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
|
FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
|
FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
|
|
|
// Deringing
|
|
|
|
// TODO: only use 4 by checking jitter
|
|
const FfxInt32 iDeringingSampleCount = 4;
|
|
const FfxFloat32x4 fDeringingSamples[4] = {
|
|
Samples.fColor11,
|
|
Samples.fColor21,
|
|
Samples.fColor12,
|
|
Samples.fColor22,
|
|
};
|
|
|
|
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
|
|
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
|
|
|
|
FFX_UNROLL
|
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
|
{
|
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
|
}
|
|
|
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
|
|
|
return fColorXY;
|
|
}
|
|
|
|
#if FFX_HALF
|
|
FfxFloat16x4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FfxFloat16x2 fPxFrac)
|
|
{
|
|
FfxFloat16x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
|
|
FfxFloat16x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
|
|
FfxFloat16x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
|
|
FfxFloat16x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
|
|
FfxFloat16x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
|
|
|
|
// Deringing
|
|
|
|
// TODO: only use 4 by checking jitter
|
|
const FfxInt32 iDeringingSampleCount = 4;
|
|
const FfxFloat16x4 fDeringingSamples[4] = {
|
|
Samples.fColor11,
|
|
Samples.fColor21,
|
|
Samples.fColor12,
|
|
Samples.fColor22,
|
|
};
|
|
|
|
FfxFloat16x4 fDeringingMin = fDeringingSamples[0];
|
|
FfxFloat16x4 fDeringingMax = fDeringingSamples[0];
|
|
|
|
FFX_UNROLL
|
|
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
|
|
{
|
|
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
|
|
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
|
|
}
|
|
|
|
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
|
|
|
|
return fColorXY;
|
|
}
|
|
|
|
// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
|
|
FfxInt16x2 ClampLoadBicubic(FfxInt16x2 iPxSample, FfxInt16x2 iPxOffset, FfxInt16x2 iTextureSize)
|
|
{
|
|
FfxInt16x2 result = iPxSample + iPxOffset;
|
|
result.x = (iPxOffset.x <= 0) ? ffxMax(result.x, FfxInt16(0)) : result.x;
|
|
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FfxInt16(1)) : result.x;
|
|
result.y = (iPxOffset.y <= 0) ? ffxMax(result.y, FfxInt16(0)) : result.y;
|
|
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FfxInt16(1)) : result.y;
|
|
return result;
|
|
}
|
|
#endif //FFX_HALF
|
|
|
|
FfxInt32x2 ClampLoadBicubic(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
|
|
{
|
|
FfxInt32x2 result = iPxSample + iPxOffset;
|
|
result.x = (iPxOffset.x <= 0) ? ffxMax(result.x, 0) : result.x;
|
|
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
|
|
result.y = (iPxOffset.y <= 0) ? ffxMax(result.y, 0) : result.y;
|
|
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
|
|
return result;
|
|
}
|
|
|
|
#define DeclareCustomFetchBicubicSamplesWithType(SampleType, AddrType, Name, LoadTexture) \
|
|
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
|
|
{ \
|
|
SampleType Samples; \
|
|
\
|
|
Samples.fColor00 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, -1), iTextureSize)); \
|
|
Samples.fColor10 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, -1), iTextureSize)); \
|
|
Samples.fColor20 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, -1), iTextureSize)); \
|
|
Samples.fColor30 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, -1), iTextureSize)); \
|
|
\
|
|
Samples.fColor01 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +0), iTextureSize)); \
|
|
Samples.fColor11 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +0), iTextureSize)); \
|
|
Samples.fColor21 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +0), iTextureSize)); \
|
|
Samples.fColor31 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +0), iTextureSize)); \
|
|
\
|
|
Samples.fColor02 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +1), iTextureSize)); \
|
|
Samples.fColor12 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +1), iTextureSize)); \
|
|
Samples.fColor22 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +1), iTextureSize)); \
|
|
Samples.fColor32 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +1), iTextureSize)); \
|
|
\
|
|
Samples.fColor03 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +2), iTextureSize)); \
|
|
Samples.fColor13 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +2), iTextureSize)); \
|
|
Samples.fColor23 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +2), iTextureSize)); \
|
|
Samples.fColor33 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +2), iTextureSize)); \
|
|
\
|
|
return Samples; \
|
|
}
|
|
|
|
#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
|
|
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxInt32x2, Name, LoadTexture)
|
|
|
|
#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
|
|
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_I2, Name, LoadTexture)
|
|
|
|
#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
|
|
FetchedBilinearSamples Name(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iTextureSize) \
|
|
{ \
|
|
FetchedBilinearSamples Samples; \
|
|
Samples.fColor00 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+0, +0), iTextureSize)); \
|
|
Samples.fColor10 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+1, +0), iTextureSize)); \
|
|
Samples.fColor01 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+0, +1), iTextureSize)); \
|
|
Samples.fColor11 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+1, +1), iTextureSize)); \
|
|
return Samples; \
|
|
}
|
|
|
|
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
|
|
// is common, so iPxSample can "jitter"
|
|
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
|
|
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
|
|
{ \
|
|
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FFX_BROADCAST_FLOAT32X2(0.5f); \
|
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
|
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
|
|
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(FFX_MIN16_I2(iPxSample), FFX_MIN16_I2(iTextureSize)), FFX_MIN16_F2(fPxFrac))); \
|
|
return fColorXY; \
|
|
}
|
|
|
|
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
|
|
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
|
|
{ \
|
|
FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FFX_BROADCAST_FLOAT32X2(0.5f); \
|
|
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
|
|
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
|
|
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(FFX_MIN16_I2(iPxSample), FFX_MIN16_I2(iTextureSize)), FFX_MIN16_F2(fPxFrac))); \
|
|
return fColorXY; \
|
|
}
|
|
|
|
#endif //!defined( FFX_FSR2_SAMPLE_H )
|