@ -44,6 +44,26 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fK
return fSampleWeight ;
return fSampleWeight ;
}
}
# if FFX_HALF
FFX_MIN16_F GetUpsampleLanczosWeight ( FFX_MIN16_F2 fSrcSampleOffset , FFX_MIN16_F fKernelWeight )
{
FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight . xx ;
# if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 / / LANCZOS_TYPE_REFERENCE
FFX_MIN16_F fSampleWeight = Lanczos2 ( length ( fSrcSampleOffsetBiased ) ) ;
# elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 / / LANCZOS_TYPE_LUT
FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT ( length ( fSrcSampleOffsetBiased ) ) ;
# elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 / / LANCZOS_TYPE_APPROXIMATE
FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq ( dot ( fSrcSampleOffsetBiased , fSrcSampleOffsetBiased ) ) ;
/ / To Test : Save reciproqual sqrt compute
/ / FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT ( dot ( fSrcSampleOffsetBiased , fSrcSampleOffsetBiased ) ) ;
# else
# error "Invalid Lanczos type"
# endif
return fSampleWeight ;
}
# endif
FfxFloat32 ComputeMaxKernelWeight ( const AccumulationPassCommonParams params , FFX_PARAMETER_INOUT AccumulationPassData data ) {
FfxFloat32 ComputeMaxKernelWeight ( const AccumulationPassCommonParams params , FFX_PARAMETER_INOUT AccumulationPassData data ) {
const FfxFloat32 fKernelSizeBias = 1.0f + ( 1.0f / FfxFloat32x2 ( DownscaleFactor ( ) ) - 1.0f ) . x ;
const FfxFloat32 fKernelSizeBias = 1.0f + ( 1.0f / FfxFloat32x2 ( DownscaleFactor ( ) ) - 1.0f ) . x ;
@ -59,6 +79,225 @@ FfxFloat32x3 LoadPreparedColor(FfxInt32x2 iSamplePos)
return fPreparedYCoCg ;
return fPreparedYCoCg ;
}
}
# if FFX_HALF && (FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2) && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1)
# define FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 1
# else
# define FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 0
# endif
# if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
void LoadPreparedColorPairedRgb ( FFX_PARAMETER_OUT FFX_MIN16_F2 r ,
FFX_PARAMETER_OUT FFX_MIN16_F2 g ,
FFX_PARAMETER_OUT FFX_MIN16_F2 b ,
FfxInt32x2 iSamplePos0 ,
FfxInt32x2 iSamplePos1 )
{
const FFX_MIN16_F3 sample0 = FFX_MIN16_F3 ( LoadInputColor ( iSamplePos0 ) ) ;
const FFX_MIN16_F3 sample1 = FFX_MIN16_F3 ( LoadInputColor ( iSamplePos1 ) ) ;
r = ffxMax ( FFX_MIN16_F2 ( 0 , 0 ) , FFX_MIN16_F2 ( sample0 . r , sample1 . r ) ) ;
g = ffxMax ( FFX_MIN16_F2 ( 0 , 0 ) , FFX_MIN16_F2 ( sample0 . g , sample1 . g ) ) ;
b = ffxMax ( FFX_MIN16_F2 ( 0 , 0 ) , FFX_MIN16_F2 ( sample0 . b , sample1 . b ) ) ;
r = FFX_MIN16_F2 ( r * Exposure ( ) ) ;
g = FFX_MIN16_F2 ( g * Exposure ( ) ) ;
b = FFX_MIN16_F2 ( b * Exposure ( ) ) ;
}
void TonemapPaired ( FFX_PARAMETER_INOUT FFX_MIN16_F2 r , FFX_PARAMETER_INOUT FFX_MIN16_F2 g , FFX_PARAMETER_INOUT FFX_MIN16_F2 b )
{
FFX_MIN16_F2 denomF16 = ffxMax ( ffxMax ( ffxMax ( 0.0 , r ) , g ) , b ) + FFX_MIN16_F2 ( 1.0 , 1.0 ) ;
/ / NOTE : expect 2 x v_cvt_f32_f16
FfxFloat32x2 denomF32 = FfxFloat32x2 ( denomF16 ) ;
/ / NOTE : expect 2 x v_rcp_f32
FfxFloat32x2 normF32 = FfxFloat32x2 ( 1.0 , 1.0 ) / denomF32 ;
/ / NOTE : expect 2 x v_cvt_f16_f32
FFX_MIN16_F2 normF16 = FFX_MIN16_F2 ( normF32 ) ;
r * = normF16 ;
g * = normF16 ;
b * = normF16 ;
}
void RGBToYCoCgPaired ( FFX_PARAMETER_INOUT FFX_MIN16_F2 r , FFX_PARAMETER_INOUT FFX_MIN16_F2 g , FFX_PARAMETER_INOUT FFX_MIN16_F2 b )
{
/**
* NOTE : given the following conversion
*
* fYCoCg = FfxFloat32x3 (
* 0.25f * fRgb . r + 0.5f * fRgb . g + 0.25f * fRgb . b ,
* 0.5f * fRgb . r - 0.5f * fRgb . b ,
* - 0.25f * fRgb . r + 0.5f * fRgb . g - 0.25f * fRgb . b ) ;
*
* it ' s possible to notice that we can compute :
* RplusBdiv4 = 0.25 * ( R + B )
*
* so everything else is computed in 3 instructions
* Y = G * 0.5 + RplusBdiv4
* Co = 2 * RplusBdiv4 - G
* Cg = G * 0.5 - RplusBdiv4
*/
/ / NOTE : expect v_pk_add_f32 + v_pk_mul_f32
FFX_MIN16_F2 RplusBdiv4 = ( r + b ) * 0.25 ;
FFX_MIN16_F2 G = g ;
FFX_MIN16_F2 B = b ;
/ / NOTE : expect 3 x v_pk_fma_f32
r = G * 0.5 + RplusBdiv4 ;
g = RplusBdiv4 * 2.0 - B ;
b = G * 0.5 - RplusBdiv4 ;
}
FFX_MIN16_F2 Compute3x3SamplesMinMaxPaired ( FFX_PARAMETER_IN FFX_MIN16_F2 sampleCenter ,
FFX_PARAMETER_IN FFX_MIN16_F2 sample0 ,
FFX_PARAMETER_IN FFX_MIN16_F2 sample1 ,
FFX_PARAMETER_IN FFX_MIN16_F2 sample2 ,
FFX_PARAMETER_IN FFX_MIN16_F2 sample3 )
{
FFX_MIN16_F2 twoMinValues = ffxMin ( ffxMin ( sample0 , sample1 ) , ffxMin ( sample2 , sample3 ) ) ;
FFX_MIN16_F2 twoMaxValues = ffxMax ( ffxMax ( sample0 , sample1 ) , ffxMax ( sample2 , sample3 ) ) ;
return FFX_MIN16_F2 (
ffxMin3Half ( twoMinValues . x , twoMinValues . y , sampleCenter . x ) ,
ffxMax3Half ( twoMaxValues . x , twoMaxValues . y , sampleCenter . x )
) ;
}
FFX_MIN16_F2 Bool2ToFloat16x2 ( bool x , bool y )
{
uint lo = x ? 0x00003c00 : 0x00000000 ;
uint hi = y ? 0x3c000000 : 0x00000000 ;
return FFX_MIN16_F2 ( __XB_AsHalf ( lo ) . x , __XB_AsHalf ( hi ) . y ) ;
}
struct PairedRectificationBoxAndAccumulatedColorAndWeight
{
FFX_MIN16_F2 boxCenterR ;
FFX_MIN16_F2 boxCenterG ;
FFX_MIN16_F2 boxCenterB ;
FFX_MIN16_F2 boxVecR ;
FFX_MIN16_F2 boxVecG ;
FFX_MIN16_F2 boxVecB ;
FFX_MIN16_F2 fBoxCenterWeight ;
FFX_MIN16_F2 fColorR ;
FFX_MIN16_F2 fColorG ;
FFX_MIN16_F2 fColorB ;
FFX_MIN16_F2 fWeight ;
FFX_MIN16_F fKernelBiasSq ;
FFX_MIN16_F fRectificationCurveBias ;
void setKernelBiasAndRectificationCurveBias ( FfxFloat32 kernelBias , FfxFloat32 rectificationCurveBias )
{
fKernelBiasSq = FFX_MIN16_F ( kernelBias * kernelBias ) ;
fRectificationCurveBias = FFX_MIN16_F ( rectificationCurveBias ) ;
}
void initUpscaledColor ( FFX_MIN16_F fSrcSampleOffsetSq , FFX_MIN16_F fOnScreenWeight , FFX_MIN16_F2 sampleR , FFX_MIN16_F2 sampleG , FFX_MIN16_F2 sampleB )
{
# if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 / / LANCZOS_TYPE_APPROXIMATE
const FFX_MIN16_F2 LanczosUpsampleWeight = FFX_MIN16_F2 (
PairedLanczos2ApproxSq ( fSrcSampleOffsetSq * fKernelBiasSq ) . x ,
0.0
) ;
# else
# error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far"
# endif
const FFX_MIN16_F2 fSampleWeight = fOnScreenWeight * LanczosUpsampleWeight ;
fColorR = sampleR * fSampleWeight ;
fColorG = sampleG * fSampleWeight ;
fColorB = sampleB * fSampleWeight ;
fWeight = fSampleWeight ;
}
void initBox ( FFX_MIN16_F fSrcSampleOffsetSq , FFX_MIN16_F fOnScreenWeight , FFX_MIN16_F2 sampleR , FFX_MIN16_F2 sampleG , FFX_MIN16_F2 sampleB )
{
const FFX_MIN16_F2 fBoxSampleWeight = FFX_MIN16_F2 (
exp ( fRectificationCurveBias * fSrcSampleOffsetSq ) * fOnScreenWeight ,
0.0
) ;
FFX_MIN16_F2 weightedSampleR = sampleR * fBoxSampleWeight ;
FFX_MIN16_F2 weightedSampleG = sampleG * fBoxSampleWeight ;
FFX_MIN16_F2 weightedSampleB = sampleB * fBoxSampleWeight ;
boxCenterR = weightedSampleR ;
boxCenterG = weightedSampleG ;
boxCenterB = weightedSampleB ;
boxVecR = sampleR * weightedSampleR ;
boxVecG = sampleG * weightedSampleG ;
boxVecB = sampleB * weightedSampleB ;
fBoxCenterWeight = fBoxSampleWeight ;
}
void addUpscaledColorSample ( FFX_MIN16_F2 fSrcSampleOffsetSq , FFX_MIN16_F2 fOnScreenWeight , FFX_MIN16_F2 sampleR , FFX_MIN16_F2 sampleG , FFX_MIN16_F2 sampleB )
{
# if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 / / LANCZOS_TYPE_APPROXIMATE
const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq ( fSrcSampleOffsetSq * fKernelBiasSq ) ;
# else
# error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far"
# endif
const FFX_MIN16_F2 fSampleWeight = fOnScreenWeight * LanczosUpsampleWeight ;
fColorR + = sampleR * fSampleWeight ;
fColorG + = sampleG * fSampleWeight ;
fColorB + = sampleB * fSampleWeight ;
fWeight + = fSampleWeight ;
}
void addBoxSample ( FFX_MIN16_F2 fSrcSampleOffsetSq , FFX_MIN16_F2 fOnScreenWeight , FFX_MIN16_F2 sampleR , FFX_MIN16_F2 sampleG , FFX_MIN16_F2 sampleB )
{
/ / NOTE : ideally expect here 2 x v_fma_mix + 2 x v_exp_f32 + 2 x v_fma_mix
const FFX_MIN16_F2 fBoxSampleWeight = exp ( fRectificationCurveBias * fSrcSampleOffsetSq ) * fOnScreenWeight ;
FFX_MIN16_F2 weightedSampleR = sampleR * fBoxSampleWeight ;
FFX_MIN16_F2 weightedSampleG = sampleG * fBoxSampleWeight ;
FFX_MIN16_F2 weightedSampleB = sampleB * fBoxSampleWeight ;
boxCenterR + = weightedSampleR ;
boxCenterG + = weightedSampleG ;
boxCenterB + = weightedSampleB ;
boxVecR + = sampleR * weightedSampleR ;
boxVecG + = sampleG * weightedSampleG ;
boxVecB + = sampleB * weightedSampleB ;
fBoxCenterWeight + = fBoxSampleWeight ;
}
void finalizeUpscaledColor ( FFX_PARAMETER_OUT FfxFloat32x4 upscaledColorAndWeight )
{
upscaledColorAndWeight . r = fColorR . x + fColorR . y ;
upscaledColorAndWeight . g = fColorG . x + fColorG . y ;
upscaledColorAndWeight . b = fColorB . x + fColorB . y ;
upscaledColorAndWeight . a = fWeight . x + fWeight . y ;
}
void finalizeBox ( FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecR ,
FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecG ,
FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecB ,
FFX_PARAMETER_OUT FfxFloat32 boxCenterWeight )
{
boxCenterAndVecR = FfxFloat32x2 ( boxCenterR . x + boxCenterR . y , boxVecR . x + boxVecR . y ) ;
boxCenterAndVecG = FfxFloat32x2 ( boxCenterG . x + boxCenterG . y , boxVecG . x + boxVecG . y ) ;
boxCenterAndVecB = FfxFloat32x2 ( boxCenterB . x + boxCenterB . y , boxVecB . x + boxVecB . y ) ;
boxCenterWeight = fBoxCenterWeight . x + fBoxCenterWeight . y ;
}
} ;
# endif / / #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
void ComputeUpsampledColorAndWeight ( const AccumulationPassCommonParams params , FFX_PARAMETER_INOUT AccumulationPassData data )
void ComputeUpsampledColorAndWeight ( const AccumulationPassCommonParams params , FFX_PARAMETER_INOUT AccumulationPassData data )
{
{
/ / We compute a sliced lanczos filter with 2 lobes ( other slices are accumulated temporaly )
/ / We compute a sliced lanczos filter with 2 lobes ( other slices are accumulated temporaly )
@ -82,6 +321,90 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F
const FfxBoolean bIsInitialSample = ( params . fAccumulation = = 0.0f ) ;
const FfxBoolean bIsInitialSample = ( params . fAccumulation = = 0.0f ) ;
# if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
/ / Unroll the loop to load samples on Scarlett to help the shader compiler
const FFX_MIN16_F2 fSampleOffsetX02 = __XB_AsHalf ( bFlipCol ? __XB_AsUInt ( FFX_MIN16_F2 ( 1 , - 1 ) ) : __XB_AsUInt ( FFX_MIN16_F2 ( - 1 , 1 ) ) ) ;
const FFX_MIN16_F2 fSampleOffsetY02 = __XB_AsHalf ( bFlipRow ? __XB_AsUInt ( FFX_MIN16_F2 ( 1 , - 1 ) ) : __XB_AsUInt ( FFX_MIN16_F2 ( - 1 , 1 ) ) ) ;
typedef FfxInt32 FfxTexCoordI ;
typedef FfxInt32x2 FfxTexCoordI2 ;
const FfxTexCoordI2 iSrcSamplePosX01 = FfxTexCoordI2 ( iSrcInputPos . xx ) + ( bFlipCol ? FfxTexCoordI2 ( 1 , 0 ) : FfxTexCoordI2 ( - 1 , 0 ) ) ;
const FfxTexCoordI2 iSrcSamplePosX23 = FfxTexCoordI2 ( iSrcInputPos . xx ) + ( bFlipCol ? FfxTexCoordI2 ( - 1 , - 2 ) : FfxTexCoordI2 ( 1 , 2 ) ) ;
const FfxTexCoordI2 iSrcSamplePosY01 = FfxTexCoordI2 ( iSrcInputPos . yy ) + ( bFlipRow ? FfxTexCoordI2 ( 1 , 0 ) : FfxTexCoordI2 ( - 1 , 0 ) ) ;
const FfxTexCoordI2 iSrcSamplePosY23 = FfxTexCoordI2 ( iSrcInputPos . yy ) + ( bFlipRow ? FfxTexCoordI2 ( - 1 , - 2 ) : FfxTexCoordI2 ( 1 , 2 ) ) ;
const FfxTexCoordI2 renderSizeLastTexelCoord = FfxTexCoordI2 ( RenderSize ( ) ) - FfxTexCoordI2 ( 1 , 1 ) ;
const FfxTexCoordI2 iSrcSamplePosX01Clamped = FfxTexCoordI2 (
__XB_Med3_I32 ( iSrcSamplePosX01 . x , 0 , renderSizeLastTexelCoord . x ) ,
__XB_Med3_I32 ( iSrcSamplePosX01 . y , 0 , renderSizeLastTexelCoord . x )
) ;
const FfxTexCoordI2 iSrcSamplePosX23Clamped = FfxTexCoordI2 (
__XB_Med3_I32 ( iSrcSamplePosX23 . x , 0 , renderSizeLastTexelCoord . x ) ,
__XB_Med3_I32 ( iSrcSamplePosX23 . y , 0 , renderSizeLastTexelCoord . x )
) ;
const FfxTexCoordI2 iSrcSamplePosY01Clamped = FfxTexCoordI2 (
__XB_Med3_I32 ( iSrcSamplePosY01 . x , 0 , renderSizeLastTexelCoord . y ) ,
__XB_Med3_I32 ( iSrcSamplePosY01 . y , 0 , renderSizeLastTexelCoord . y )
) ;
const FfxTexCoordI2 iSrcSamplePosY23Clamped = FfxTexCoordI2 (
__XB_Med3_I32 ( iSrcSamplePosY23 . x , 0 , renderSizeLastTexelCoord . y ) ,
__XB_Med3_I32 ( iSrcSamplePosY23 . y , 0 , renderSizeLastTexelCoord . y )
) ;
FFX_MIN16_F2 TopCornerR , BotCornerR , HorzR , VertR , CenterR ;
FFX_MIN16_F2 TopCornerG , BotCornerG , HorzG , VertG , CenterG ;
FFX_MIN16_F2 TopCornerB , BotCornerB , HorzB , VertB , CenterB ;
LoadPreparedColorPairedRgb ( TopCornerR , TopCornerG , TopCornerB ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . x , iSrcSamplePosY01Clamped . x ) ,
FfxTexCoordI2 ( iSrcSamplePosX23Clamped . x , iSrcSamplePosY01Clamped . x )
) ;
LoadPreparedColorPairedRgb ( BotCornerR , BotCornerG , BotCornerB ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . x , iSrcSamplePosY23Clamped . x ) ,
FfxTexCoordI2 ( iSrcSamplePosX23Clamped . x , iSrcSamplePosY23Clamped . x )
) ;
LoadPreparedColorPairedRgb ( HorzR , HorzG , HorzB ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . x , iSrcSamplePosY01Clamped . y ) ,
FfxTexCoordI2 ( iSrcSamplePosX23Clamped . x , iSrcSamplePosY01Clamped . y )
) ;
LoadPreparedColorPairedRgb ( VertR , VertG , VertB ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . y , iSrcSamplePosY01Clamped . x ) ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . y , iSrcSamplePosY23Clamped . x )
) ;
/ / NOTE : duplicated data
LoadPreparedColorPairedRgb ( CenterR , CenterG , CenterB ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . y , iSrcSamplePosY01Clamped . y ) ,
FfxTexCoordI2 ( iSrcSamplePosX01Clamped . y , iSrcSamplePosY01Clamped . y )
) ;
# if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
if ( bIsInitialSample )
{
TonemapPaired ( TopCornerR , TopCornerG , TopCornerB ) ;
TonemapPaired ( BotCornerR , BotCornerG , BotCornerB ) ;
TonemapPaired ( HorzR , HorzG , HorzB ) ;
TonemapPaired ( VertR , VertG , VertB ) ;
TonemapPaired ( CenterR , CenterG , CenterB ) ;
}
# endif
RGBToYCoCgPaired ( TopCornerR , TopCornerG , TopCornerB ) ;
RGBToYCoCgPaired ( BotCornerR , BotCornerG , BotCornerB ) ;
RGBToYCoCgPaired ( HorzR , HorzG , HorzB ) ;
RGBToYCoCgPaired ( VertR , VertG , VertB ) ;
RGBToYCoCgPaired ( CenterR , CenterG , CenterB ) ;
# else
FfxFloat32x3 fSamples [ 9 ] ;
FfxFloat32x3 fSamples [ 9 ] ;
FfxInt32 iSampleIndex = 0 ;
FfxInt32 iSampleIndex = 0 ;
@ -110,6 +433,8 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F
}
}
# endif
# endif
# endif / / #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
/ / Identify how much of each upsampled color to be used for this frame
/ / Identify how much of each upsampled color to be used for this frame
const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight ( params , data ) ;
const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight ( params , data ) ;
const FfxFloat32 fKernelBiasMin = ffxMax ( 1.0f , ( ( 1.0f + fKernelBiasMax ) * 0.3f ) ) ;
const FfxFloat32 fKernelBiasMin = ffxMax ( 1.0f , ( ( 1.0f + fKernelBiasMax ) * 0.3f ) ) ;
@ -122,6 +447,139 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F
const FfxFloat32 fKernelBias = ffxLerp ( fKernelBiasMin , fKernelBiasMax , fKernelBiasWeight ) ;
const FfxFloat32 fKernelBias = ffxLerp ( fKernelBiasMin , fKernelBiasMax , fKernelBiasWeight ) ;
# if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
/ / Unroll the loop to load samples on Scarlett to help the shader compiler
const bool coordX0OnScreen = iSrcSamplePosX01 . x = = iSrcSamplePosX01Clamped . x ;
const bool coordX1OnScreen = iSrcSamplePosX01 . y = = iSrcSamplePosX01Clamped . y ;
const bool coordX2OnScreen = iSrcSamplePosX23 . x = = iSrcSamplePosX23Clamped . x ;
const bool coordY0OnScreen = iSrcSamplePosY01 . x = = iSrcSamplePosY01Clamped . x ;
const bool coordY1OnScreen = iSrcSamplePosY01 . y = = iSrcSamplePosY01Clamped . y ;
const bool coordY2OnScreen = iSrcSamplePosY23 . x = = iSrcSamplePosY23Clamped . x ;
const FFX_MIN16_F2 fBaseSampleOffsetHalf = FFX_MIN16_F2 ( fBaseSampleOffset ) ;
const FFX_MIN16_F2 fSrcSampleOffsetX_02 = fBaseSampleOffsetHalf . xx + fSampleOffsetX02 ;
const FFX_MIN16_F2 fSrcSampleOffsetY_02 = fBaseSampleOffsetHalf . yy + fSampleOffsetY02 ;
const FFX_MIN16_F2 fSrcSampleOffsetXSq_02 = fSrcSampleOffsetX_02 * fSrcSampleOffsetX_02 ;
const FFX_MIN16_F2 fSrcSampleOffsetYSq_02 = fSrcSampleOffsetY_02 * fSrcSampleOffsetY_02 ;
const FFX_MIN16_F2 fSrcSampleOffsetXYSq_11 = fBaseSampleOffsetHalf * fBaseSampleOffsetHalf ;
const FfxFloat32 fRectificationCurveBias = - 2.3f ;
PairedRectificationBoxAndAccumulatedColorAndWeight pairedBox ;
pairedBox . setKernelBiasAndRectificationCurveBias ( fKernelBias , fRectificationCurveBias ) ;
/ / init by o o o
/ / o x o
/ / o o o
pairedBox . initBox (
fSrcSampleOffsetXYSq_11 . x + fSrcSampleOffsetXYSq_11 . y ,
Bool2ToFloat16x2 ( coordX1OnScreen & & coordY1OnScreen , false ) . x ,
CenterR , CenterG , CenterB
) ;
/ / add remaining two samples from 1 st row x o x
/ / o * o
/ / o o o
pairedBox . addBoxSample (
fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02 . xx ,
Bool2ToFloat16x2 ( coordX0OnScreen & & coordY0OnScreen , coordX2OnScreen & & coordY0OnScreen ) ,
TopCornerR , TopCornerG , TopCornerB
) ;
/ / add two samples from 2 nd row * o *
/ / o * o
/ / x o x
pairedBox . addBoxSample (
fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02 . yy ,
Bool2ToFloat16x2 ( coordX0OnScreen & & coordY2OnScreen , coordX2OnScreen & & coordY2OnScreen ) ,
BotCornerR , BotCornerG , BotCornerB
) ;
/ / add two samples from 3 rd row * o *
/ / x * x
/ / * o *
pairedBox . addBoxSample (
fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11 . yy ,
Bool2ToFloat16x2 ( coordX0OnScreen & & coordY1OnScreen , coordX2OnScreen & & coordY1OnScreen ) ,
HorzR , HorzG , HorzB
) ;
/ / add remaining samples * x *
/ / * * *
/ / * x *
pairedBox . addBoxSample (
fSrcSampleOffsetXYSq_11 . xx + fSrcSampleOffsetYSq_02 ,
Bool2ToFloat16x2 ( coordX1OnScreen & & coordY0OnScreen , coordX1OnScreen & & coordY2OnScreen ) ,
VertR , VertG , VertB
) ;
FfxFloat32x2 boxCenterAndVecR , boxCenterAndVecG , boxCenterAndVecB ;
FfxFloat32 boxCenterWeight ;
pairedBox . finalizeBox ( boxCenterAndVecR , boxCenterAndVecG , boxCenterAndVecB , boxCenterWeight ) ;
if ( ! bIsInitialSample )
{
pairedBox . initUpscaledColor (
fSrcSampleOffsetXYSq_11 . x + fSrcSampleOffsetXYSq_11 . y ,
Bool2ToFloat16x2 ( coordX1OnScreen & & coordY1OnScreen , false ) . x ,
CenterR , CenterG , CenterB
) ;
/ / add remaining two samples from 1 st row x o x
/ / o * o
/ / o o o
pairedBox . addUpscaledColorSample (
fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02 . xx ,
Bool2ToFloat16x2 ( coordX0OnScreen & & coordY0OnScreen , coordX2OnScreen & & coordY0OnScreen ) ,
TopCornerR , TopCornerG , TopCornerB
) ;
/ / add two samples from 2 nd row * o *
/ / o * o
/ / x o x
pairedBox . addUpscaledColorSample (
fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02 . yy ,
Bool2ToFloat16x2 ( coordX0OnScreen & & coordY2OnScreen , coordX2OnScreen & & coordY2OnScreen ) ,
BotCornerR , BotCornerG , BotCornerB
) ;
/ / add two samples from 3 rd row * o *
/ / x * x
/ / * o *
pairedBox . addUpscaledColorSample (
fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11 . yy ,
Bool2ToFloat16x2 ( coordX0OnScreen & & coordY1OnScreen , coordX2OnScreen & & coordY1OnScreen ) ,
HorzR , HorzG , HorzB
) ;
/ / add remaining samples * x *
/ / * * *
/ / * x *
pairedBox . addUpscaledColorSample (
fSrcSampleOffsetXYSq_11 . xx + fSrcSampleOffsetYSq_02 ,
Bool2ToFloat16x2 ( coordX1OnScreen & & coordY0OnScreen , coordX1OnScreen & & coordY2OnScreen ) ,
VertR , VertG , VertB
) ;
FfxFloat32x4 upscaledColorAndWeight = 0.0 ;
pairedBox . finalizeUpscaledColor ( upscaledColorAndWeight ) ;
data . fUpsampledColor = FfxFloat32x3 ( upscaledColorAndWeight . rgb ) ;
data . fUpsampledWeight = FfxFloat32 ( upscaledColorAndWeight . w ) ;
}
FFX_MIN16_F2 aabbMinMaxR = Compute3x3SamplesMinMaxPaired ( CenterR , TopCornerR , BotCornerR , HorzR , VertR ) ;
FFX_MIN16_F2 aabbMinMaxG = Compute3x3SamplesMinMaxPaired ( CenterG , TopCornerG , BotCornerG , HorzG , VertG ) ;
FFX_MIN16_F2 aabbMinMaxB = Compute3x3SamplesMinMaxPaired ( CenterB , TopCornerB , BotCornerB , HorzB , VertB ) ;
data . clippingBox . boxCenter = FfxFloat32x3 ( boxCenterAndVecR . x , boxCenterAndVecG . x , boxCenterAndVecB . x ) ;
data . clippingBox . boxVec = FfxFloat32x3 ( boxCenterAndVecR . y , boxCenterAndVecG . y , boxCenterAndVecB . y ) ;
data . clippingBox . aabbMin = FfxFloat32x3 ( aabbMinMaxR . x , aabbMinMaxG . x , aabbMinMaxB . x ) ;
data . clippingBox . aabbMax = FfxFloat32x3 ( aabbMinMaxR . y , aabbMinMaxG . y , aabbMinMaxB . y ) ;
data . clippingBox . fBoxCenterWeight = FfxFloat32 ( boxCenterWeight ) ;
# else
iSampleIndex = 0 ;
iSampleIndex = 0 ;
@ -159,6 +617,8 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F
}
}
}
}
# endif / / #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS
RectificationBoxComputeVarianceBoxData ( data . clippingBox ) ;
RectificationBoxComputeVarianceBoxData ( data . clippingBox ) ;
data . fUpsampledWeight * = FfxFloat32 ( data . fUpsampledWeight > FSR3UPSCALER_EPSILON ) ;
data . fUpsampledWeight * = FfxFloat32 ( data . fUpsampledWeight > FSR3UPSCALER_EPSILON ) ;