// This file is part of the FidelityFX SDK. // // Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef FFX_FSR2_SAMPLE_H #define FFX_FSR2_SAMPLE_H // suppress warnings #ifdef FFX_HLSL #pragma warning(disable: 4008) // potentially divide by zero #endif //FFX_HLSL struct FetchedBilinearSamples { FfxFloat32x4 fColor00; FfxFloat32x4 fColor10; FfxFloat32x4 fColor01; FfxFloat32x4 fColor11; }; struct FetchedBicubicSamples { FfxFloat32x4 fColor00; FfxFloat32x4 fColor10; FfxFloat32x4 fColor20; FfxFloat32x4 fColor30; FfxFloat32x4 fColor01; FfxFloat32x4 fColor11; FfxFloat32x4 fColor21; FfxFloat32x4 fColor31; FfxFloat32x4 fColor02; FfxFloat32x4 fColor12; FfxFloat32x4 fColor22; FfxFloat32x4 fColor32; FfxFloat32x4 fColor03; FfxFloat32x4 fColor13; FfxFloat32x4 fColor23; FfxFloat32x4 fColor33; }; #if FFX_HALF struct FetchedBicubicSamplesMin16 { FfxFloat16x4 fColor00; FfxFloat16x4 fColor10; FfxFloat16x4 fColor20; FfxFloat16x4 fColor30; FfxFloat16x4 fColor01; FfxFloat16x4 fColor11; FfxFloat16x4 fColor21; FfxFloat16x4 fColor31; FfxFloat16x4 fColor02; FfxFloat16x4 fColor12; FfxFloat16x4 fColor22; FfxFloat16x4 fColor32; FfxFloat16x4 fColor03; FfxFloat16x4 fColor13; FfxFloat16x4 fColor23; FfxFloat16x4 fColor33; }; #else //FFX_HALF #define FetchedBicubicSamplesMin16 FetchedBicubicSamples #endif //FFX_HALF FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) { return A + (B - A) * t; } FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); return fColorXY; } // SEE: ../Interpolation/CatmullRom.ipynb, t=0 -> B, t=1 -> C FfxFloat32x4 CubicCatmullRom(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32x4 C, FfxFloat32x4 D, FfxFloat32 t) { FfxFloat32 t2 = t * t; FfxFloat32 t3 = t * t * t; FfxFloat32x4 a = -A / 2.f + (3.f * B) / 2.f - (3.f * C) / 2.f + D / 2.f; FfxFloat32x4 b = A - (5.f * B) / 2.f + 2.f * C - D / 2.f; FfxFloat32x4 c = -A / 2.f + C / 2.f; FfxFloat32x4 d = B; return a * t3 + b * t2 + c * t + d; } FfxFloat32x4 BicubicCatmullRom(FetchedBicubicSamples BicubicSamples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = CubicCatmullRom(BicubicSamples.fColor00, BicubicSamples.fColor10, BicubicSamples.fColor20, BicubicSamples.fColor30, fPxFrac.x); FfxFloat32x4 fColorX1 = CubicCatmullRom(BicubicSamples.fColor01, BicubicSamples.fColor11, BicubicSamples.fColor21, BicubicSamples.fColor31, fPxFrac.x); FfxFloat32x4 fColorX2 = CubicCatmullRom(BicubicSamples.fColor02, BicubicSamples.fColor12, BicubicSamples.fColor22, BicubicSamples.fColor32, fPxFrac.x); FfxFloat32x4 fColorX3 = CubicCatmullRom(BicubicSamples.fColor03, BicubicSamples.fColor13, BicubicSamples.fColor23, BicubicSamples.fColor33, fPxFrac.x); FfxFloat32x4 fColorXY = CubicCatmullRom(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); return fColorXY; } FfxFloat32 Lanczos2(FfxFloat32 x) { const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); } #if FFX_HALF FfxFloat16 Lanczos2(FfxFloat16 x) { const FFX_MIN16_F PI = FfxFloat16(3.141592653589793f); // TODO: share SDK constants return abs(x) < FSR2_EPSILON ? FfxFloat16(1.f) : (sin(PI * x) / (PI * x)) * (sin(FfxFloat16(0.5f) * PI * x) / (FfxFloat16(0.5f) * PI * x)); } #endif //FFX_HALF // FSR1 lanczos approximation. Input is x*x and must be <= 4. FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) { FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); } #if FFX_HALF FfxFloat16 Lanczos2ApproxSqNoClamp(FfxFloat16 x2) { FfxFloat16 a = FfxFloat16(2.0f / 5.0f) * x2 - FfxFloat16(1); FfxFloat16 b = FfxFloat16(1.0f / 4.0f) * x2 - FfxFloat16(1); return (FfxFloat16(25.0f / 16.0f) * a * a - FfxFloat16(25.0f / 16.0f - 1)) * (b * b); } #endif //FFX_HALF FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) { x2 = ffxMin(x2, 4.0f); return Lanczos2ApproxSqNoClamp(x2); } #if FFX_HALF FfxFloat16 Lanczos2ApproxSq(FfxFloat16 x2) { x2 = ffxMin(x2, FfxFloat16(4.0f)); return Lanczos2ApproxSqNoClamp(x2); } #endif //FFX_HALF FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) { return Lanczos2ApproxSqNoClamp(x * x); } #if FFX_HALF FfxFloat16 Lanczos2ApproxNoClamp(FfxFloat16 x) { return Lanczos2ApproxSqNoClamp(x * x); } #endif //FFX_HALF FfxFloat32 Lanczos2Approx(FfxFloat32 x) { return Lanczos2ApproxSq(x * x); } #if FFX_HALF FfxFloat16 Lanczos2Approx(FfxFloat16 x) { return Lanczos2ApproxSq(x * x); } #endif //FFX_HALF FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) { return SampleLanczos2Weight(abs(x)); } #if FFX_HALF FfxFloat16 Lanczos2_UseLUT(FfxFloat16 x) { return SampleLanczos2Weight(abs(x)); } #endif //FFX_HALF #if FFX_FSR2_OPTION_USE_LANCZOS_LUT FfxFloat32x4 Lanczos2_AllowLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) { FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #if FFX_HALF FfxFloat16x4 Lanczos2_AllowLUT(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t) { FfxFloat16 fWeight0 = Lanczos2_UseLUT(FfxFloat16(-1.f) - t); FfxFloat16 fWeight1 = Lanczos2_UseLUT(FfxFloat16(-0.f) - t); FfxFloat16 fWeight2 = Lanczos2_UseLUT(FfxFloat16(+1.f) - t); FfxFloat16 fWeight3 = Lanczos2_UseLUT(FfxFloat16(+2.f) - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #endif //FFX_HALF #else //FFX_FSR2_OPTION_USE_LANCZOS_LUT #define Lanczos2_AllowLUT Lanczos2 #endif //FFX_FSR2_OPTION_USE_LANCZOS_LUT FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) { FfxFloat32 fWeight0 = Lanczos2(-1.f - t); FfxFloat32 fWeight1 = Lanczos2(-0.f - t); FfxFloat32 fWeight2 = Lanczos2(+1.f - t); FfxFloat32 fWeight3 = Lanczos2(+2.f - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Lanczos2_AllowLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat32x4 fColorX1 = Lanczos2_AllowLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat32x4 fColorX2 = Lanczos2_AllowLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat32x4 fColorX3 = Lanczos2_AllowLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat32x4 fColorXY = Lanczos2_AllowLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat32x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #if FFX_HALF FfxFloat16x4 Lanczos2(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t) { FfxFloat16 fWeight0 = Lanczos2(FfxFloat16(-1.f) - t); FfxFloat16 fWeight1 = Lanczos2(FfxFloat16(-0.f) - t); FfxFloat16 fWeight2 = Lanczos2(FfxFloat16(+1.f) - t); FfxFloat16 fWeight3 = Lanczos2(FfxFloat16(+2.f) - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } FfxFloat16x4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) { FfxFloat16x4 fColorX0 = Lanczos2_AllowLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat16x4 fColorX1 = Lanczos2_AllowLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat16x4 fColorX2 = Lanczos2_AllowLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat16x4 fColorX3 = Lanczos2_AllowLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat16x4 fColorXY = Lanczos2_AllowLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat16x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat16x4 fDeringingMin = fDeringingSamples[0]; FfxFloat16x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #endif //FFX_HALF FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) { FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #if FFX_HALF FfxFloat16x4 Lanczos2Approx(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t) { FfxFloat16 fWeight0 = Lanczos2ApproxNoClamp(FfxFloat16(-1.f) - t); FfxFloat16 fWeight1 = Lanczos2ApproxNoClamp(FfxFloat16(-0.f) - t); FfxFloat16 fWeight2 = Lanczos2ApproxNoClamp(FfxFloat16(+1.f) - t); FfxFloat16 fWeight3 = Lanczos2ApproxNoClamp(FfxFloat16(+2.f) - t); return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); } #endif //FFX_HALF FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) { FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat32x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } #if FFX_HALF FfxFloat16x4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FfxFloat16x2 fPxFrac) { FfxFloat16x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); FfxFloat16x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); FfxFloat16x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); FfxFloat16x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); FfxFloat16x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); // Deringing // TODO: only use 4 by checking jitter const FfxInt32 iDeringingSampleCount = 4; const FfxFloat16x4 fDeringingSamples[4] = { Samples.fColor11, Samples.fColor21, Samples.fColor12, Samples.fColor22, }; FfxFloat16x4 fDeringingMin = fDeringingSamples[0]; FfxFloat16x4 fDeringingMax = fDeringingSamples[0]; FFX_UNROLL for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); } fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); return fColorXY; } // Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. FfxInt16x2 ClampLoadBicubic(FfxInt16x2 iPxSample, FfxInt16x2 iPxOffset, FfxInt16x2 iTextureSize) { FfxInt16x2 result = iPxSample + iPxOffset; result.x = (iPxOffset.x <= 0) ? ffxMax(result.x, FfxInt16(0)) : result.x; result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FfxInt16(1)) : result.x; result.y = (iPxOffset.y <= 0) ? ffxMax(result.y, FfxInt16(0)) : result.y; result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FfxInt16(1)) : result.y; return result; } #endif //FFX_HALF FfxInt32x2 ClampLoadBicubic(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) { FfxInt32x2 result = iPxSample + iPxOffset; result.x = (iPxOffset.x <= 0) ? ffxMax(result.x, 0) : result.x; result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; result.y = (iPxOffset.y <= 0) ? ffxMax(result.y, 0) : result.y; result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; return result; } #define DeclareCustomFetchBicubicSamplesWithType(SampleType, AddrType, Name, LoadTexture) \ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ { \ SampleType Samples; \ \ Samples.fColor00 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, -1), iTextureSize)); \ Samples.fColor10 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, -1), iTextureSize)); \ Samples.fColor20 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, -1), iTextureSize)); \ Samples.fColor30 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, -1), iTextureSize)); \ \ Samples.fColor01 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +0), iTextureSize)); \ Samples.fColor11 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +0), iTextureSize)); \ Samples.fColor21 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +0), iTextureSize)); \ Samples.fColor31 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +0), iTextureSize)); \ \ Samples.fColor02 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +1), iTextureSize)); \ Samples.fColor12 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +1), iTextureSize)); \ Samples.fColor22 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +1), iTextureSize)); \ Samples.fColor32 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +1), iTextureSize)); \ \ Samples.fColor03 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +2), iTextureSize)); \ Samples.fColor13 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +2), iTextureSize)); \ Samples.fColor23 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +2), iTextureSize)); \ Samples.fColor33 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +2), iTextureSize)); \ \ return Samples; \ } #define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxInt32x2, Name, LoadTexture) #define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_I2, Name, LoadTexture) #define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ FetchedBilinearSamples Name(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iTextureSize) \ { \ FetchedBilinearSamples Samples; \ Samples.fColor00 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+0, +0), iTextureSize)); \ Samples.fColor10 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+1, +0), iTextureSize)); \ Samples.fColor01 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+0, +1), iTextureSize)); \ Samples.fColor11 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+1, +1), iTextureSize)); \ return Samples; \ } // BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) // is common, so iPxSample can "jitter" #define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FFX_BROADCAST_FLOAT32X2(0.5f); \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(FFX_MIN16_I2(iPxSample), FFX_MIN16_I2(iTextureSize)), FFX_MIN16_F2(fPxFrac))); \ return fColorXY; \ } #define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FFX_BROADCAST_FLOAT32X2(0.5f); \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(FFX_MIN16_I2(iPxSample), FFX_MIN16_I2(iTextureSize)), FFX_MIN16_F2(fPxFrac))); \ return fColorXY; \ } #endif //!defined( FFX_FSR2_SAMPLE_H )