You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

270 lines
11 KiB

// This file is part of the FidelityFX SDK.
//
// Copyright (C) 2024 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
FfxFloat32 ComputeDisocclusions(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthViewSpace)
{
const FfxFloat32 fNearestDepthInMeters = ffxMin(fCurrentDepthViewSpace * ViewSpaceToMetersFactor(), FSR3UPSCALER_FP16_MAX);
const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters);
fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold);
const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
FfxFloat32 fDisocclusion = 0.0f;
FfxFloat32 fWeightSum = 0.0f;
FfxBoolean bPotentialDisocclusion = true;
for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4 && bPotentialDisocclusion; iSampleIndex++)
{
const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
const FfxInt32x2 iSamplePos = ClampLoad(bilinearInfo.iBasePos, iOffset, FfxInt32x2(RenderSize()));
if (IsOnScreen(iSamplePos, RenderSize())) {
const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(LoadReconstructedPrevDepth(iSamplePos));
const FfxFloat32 fDepthDifference = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
bPotentialDisocclusion = bPotentialDisocclusion && (fDepthDifference > FSR3UPSCALER_FP32_MIN);
if (bPotentialDisocclusion) {
const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()) * 0.5f);
const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
const FfxFloat32 Ksep = 1.37e-05f;
const FfxFloat32 fRequiredDepthSeparation = Ksep * fHalfViewportWidth * fDepthThreshold;
fDisocclusion += ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDifference)) * fWeight;
fWeightSum += fWeight;
}
}
}
}
fDisocclusion = (bPotentialDisocclusion && fWeightSum > 0) ? ffxSaturate(1.0f - fDisocclusion / fWeightSum) : 0.0f;
return fDisocclusion;
}
FfxFloat32 ComputeMotionDivergence(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthSample)
{
const FfxInt32x2 iPxReprojectedPos = FfxInt32x2((fUv + fMotionVector) * RenderSize());
const FfxFloat32 fReprojectedDepth = LoadDilatedDepth(iPxReprojectedPos);
const FfxFloat32x2 fReprojectedMotionVector = LoadDilatedMotionVector(iPxReprojectedPos);
const FfxFloat32 fReprojectedVelocity = Get4KVelocity(fReprojectedMotionVector);
const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector);
const FfxFloat32 fMaxLen = max(length(fMotionVector), length(fReprojectedMotionVector));
const FfxFloat32 fNucleusDepthInMeters = GetViewSpaceDepthInMeters(fReprojectedDepth);
const FfxFloat32 fCurrentDepthInMeters = GetViewSpaceDepthInMeters(fCurrentDepthSample);
const FfxFloat32 fDistanceFactor = MinDividedByMax(fNucleusDepthInMeters, fCurrentDepthInMeters);
const FfxFloat32 fVelocityFactor = ffxSaturate(f4KVelocity / 10.0f);
const FfxFloat32 fMotionVectorFieldConfidence = (1.0f - ffxSaturate(fReprojectedVelocity / f4KVelocity)) * fDistanceFactor * fVelocityFactor;
return fMotionVectorFieldConfidence;
}
FfxFloat32 DilateReactiveMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv)
{
FfxFloat32 fDilatedReactiveMasks = 0.0f;
FFX_UNROLL
for (FfxInt32 y = -1; y <=1; y++)
{
FFX_UNROLL
for (FfxInt32 x = -1; x <= 1; x++)
{
const FfxInt32x2 sampleCoord = ClampLoad(iPxPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
fDilatedReactiveMasks = ffxMax(fDilatedReactiveMasks, LoadReactiveMask(sampleCoord));
}
}
return fDilatedReactiveMasks;
}
FfxFloat32 DilateTransparencyAndCompositionMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv)
{
const FfxFloat32x2 fUvTransparencyAndCompositionMask = ClampUv(fUv, RenderSize(), GetTransparencyAndCompositionMaskResourceDimensions());
return SampleTransparencyAndCompositionMask(fUvTransparencyAndCompositionMask);
}
FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 iPxPos)
{
/*
1 2 3
4 0 5
6 7 8
*/
const FfxInt32 iNucleusIndex = 0;
const FfxInt32 iSampleCount = 9;
const FfxInt32x2 iSampleOffsets[iSampleCount] = {
FfxInt32x2(+0, +0),
FfxInt32x2(-1, -1),
FfxInt32x2(+0, -1),
FfxInt32x2(+1, -1),
FfxInt32x2(-1, +0),
FfxInt32x2(+1, +0),
FfxInt32x2(-1, +1),
FfxInt32x2(+0, +1),
FfxInt32x2(+1, +1),
};
FfxFloat32 fSamples[iSampleCount];
FfxFloat32 fLumaMin = FSR3UPSCALER_FP32_MAX;
FfxFloat32 fLumaMax = FSR3UPSCALER_FP32_MIN;
FFX_UNROLL
for (FfxInt32 iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
const FfxInt32x2 iPxSamplePos = ClampLoad(iPxPos, iSampleOffsets[iSampleIndex], FfxInt32x2(RenderSize()));
fSamples[iSampleIndex] = LoadCurrentLuma(iPxSamplePos) * Exposure();
fLumaMin = ffxMin(fLumaMin, fSamples[iSampleIndex]);
fLumaMax = ffxMax(fLumaMax, fSamples[iSampleIndex]);
}
const FfxFloat32 fThreshold = 0.9f;
FfxFloat32 fDissimilarLumaMin = FSR3UPSCALER_FP32_MAX;
FfxFloat32 fDissimilarLumaMax = 0;
#define SETBIT(x) (1U << x)
FfxUInt32 uPatternMask = SETBIT(iNucleusIndex); // Flag nucleus as similar
const FfxUInt32 uNumRejectionMasks = 4;
const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(iNucleusIndex), // Upper left
SETBIT(2) | SETBIT(3) | SETBIT(5) | SETBIT(iNucleusIndex), // Upper right
SETBIT(4) | SETBIT(6) | SETBIT(7) | SETBIT(iNucleusIndex), // Lower left
SETBIT(5) | SETBIT(7) | SETBIT(8) | SETBIT(iNucleusIndex) // Lower right
};
FfxInt32 iBitIndex = 1;
FFX_UNROLL
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex, ++iBitIndex) {
const FfxFloat32 fDifference = abs(fSamples[iSampleIndex] - fSamples[iNucleusIndex]) / (fLumaMax - fLumaMin);
if (fDifference < fThreshold)
{
uPatternMask |= SETBIT(iBitIndex);
}
else
{
fDissimilarLumaMin = ffxMin(fDissimilarLumaMin, fSamples[iSampleIndex]);
fDissimilarLumaMax = ffxMax(fDissimilarLumaMax, fSamples[iSampleIndex]);
}
}
const FfxBoolean bIsRidge = fSamples[iNucleusIndex] > fDissimilarLumaMax || fSamples[iNucleusIndex] < fDissimilarLumaMin;
if (FFX_FALSE == bIsRidge)
{
return 0.0f;
}
FFX_UNROLL
for (FfxInt32 i = 0; i < uNumRejectionMasks; i++)
{
if ((uPatternMask & uRejectionMasks[i]) == uRejectionMasks[i])
{
return 0.0f;
}
}
return 1.0f - fLumaMin / fLumaMax;
}
FfxFloat32 UpdateAccumulation(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fDisocclusion, FfxFloat32 fShadingChange)
{
const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
FfxFloat32 fAccumulation = 0.0f;
if (IsUvInside(fReprojectedUv)) {
const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize());
fAccumulation = ffxSaturate(SampleAccumulation(fReprojectedUv_HW));
}
fAccumulation = ffxLerp(fAccumulation, 0.0f, fShadingChange);
fAccumulation = ffxLerp(fAccumulation, ffxMin(fAccumulation, 0.25f), fDisocclusion);
fAccumulation *= FfxFloat32(round(fAccumulation * 100.0f) > 1.0f);
// Update for next frame, normalize to store in unorm
const FfxFloat32 fAccumulatedFramesMax = 3.0f;
const FfxFloat32 fAccumulatedFramesToStore = ffxSaturate(fAccumulation + (1.0f / fAccumulatedFramesMax));
StoreAccumulation(iPxPos, fAccumulatedFramesToStore);
return fAccumulation;
}
FfxFloat32 ComputeShadingChange(FfxFloat32x2 fUv)
{
// NOTE: Here we re-apply jitter, will be reverted again when sampled in accumulation pass
const FfxFloat32x2 fShadingChangeUv = ClampUv(fUv - Jitter() / RenderSize(), ShadingChangeRenderSize(), ShadingChangeMaxRenderSize());
const FfxFloat32 fShadingChange = ffxSaturate(SampleShadingChange(fShadingChangeUv));
return fShadingChange;
}
void PrepareReactivity(FfxInt32x2 iPxPos)
{
const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize();
const FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
// Discard small mvs
const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector);
const FfxFloat32x2 fDilatedUv = fUv + fMotionVector;
const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth);
const FfxFloat32 fDisocclusion = ComputeDisocclusions(fUv, fMotionVector, GetViewSpaceDepth(fDilatedDepth));
const FfxFloat32 fShadingChange = ffxMax(DilateReactiveMasks(iPxPos, fUv), ComputeShadingChange(fUv));
const FfxFloat32 fMotionDivergence = ComputeMotionDivergence(fUv, fMotionVector, fDilatedDepth);
const FfxFloat32 fDilatedTransparencyAndComposition = DilateTransparencyAndCompositionMasks(iPxPos, fUv);
const FfxFloat32 fFinalReactiveness = ffxMax(fMotionDivergence, fDilatedTransparencyAndComposition);
const FfxFloat32 fAccumulation = UpdateAccumulation(iPxPos, fUv, fMotionVector, fDisocclusion, fShadingChange);
FfxFloat32x4 fOutput;
fOutput[REACTIVE] = fFinalReactiveness;
fOutput[DISOCCLUSION] = fDisocclusion;
fOutput[SHADING_CHANGE] = fShadingChange;
fOutput[ACCUMULAION] = fAccumulation;
StoreDilatedReactiveMasks(iPxPos, fOutput);
const FfxFloat32 fLockStrength = ComputeThinFeatureConfidence(iPxPos);
if (fLockStrength > (1.0f / 100.0f))
{
StoreNewLocks(ComputeHrPosFromLrPos(FfxInt32x2(iPxPos)), fLockStrength);
}
}