You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
6.7 KiB
159 lines
6.7 KiB
// This file is part of the FidelityFX SDK.
|
|
//
|
|
// Copyright (C) 2024 Advanced Micro Devices, Inc.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files(the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions :
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
|
|
#ifndef FFX_OPTICALFLOW_COMPUTE_SCD_DIVERGENCE_H
|
|
#define FFX_OPTICALFLOW_COMPUTE_SCD_DIVERGENCE_H
|
|
|
|
FFX_GROUPSHARED FfxFloat32 sourceHistogram[256];
|
|
FFX_GROUPSHARED FfxFloat32 filteredHistogram[256];
|
|
FFX_GROUPSHARED FfxFloat32 tempBuffer[256];
|
|
FFX_GROUPSHARED FfxFloat32x2 tempBuffer2[256];
|
|
|
|
void ComputeSCDHistogramsDivergence(FfxInt32x3 iGlobalId, FfxInt32x2 iLocalId, FfxInt32 iLocalIndex, FfxInt32x2 iGroupId, FfxInt32x2 iGroupSize)
|
|
{
|
|
FFX_STATIC const FfxFloat32 Factor = 1000000.0;
|
|
FFX_STATIC const FfxInt32 WhereToStop = 3*9 - 1;
|
|
FFX_STATIC const FfxInt32 HistogramCount = 3 * 3;
|
|
|
|
FFX_STATIC const FfxFloat32 Kernel[] = {
|
|
0.0088122291, 0.027143577, 0.065114059, 0.12164907, 0.17699835, 0.20056541
|
|
};
|
|
|
|
sourceHistogram[iLocalIndex] = FfxFloat32(LoadRwSCDHistogram(iGlobalId.x));
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
const FfxInt32 kernelShift = -5;
|
|
const FfxInt32 indexToRead = iLocalIndex + kernelShift;
|
|
|
|
FfxFloat32 val = 0.0;
|
|
val += Kernel[0] * sourceHistogram[ffxClamp(indexToRead + 0, 0, 255)];
|
|
val += Kernel[1] * sourceHistogram[ffxClamp(indexToRead + 1, 0, 255)];
|
|
val += Kernel[2] * sourceHistogram[ffxClamp(indexToRead + 2, 0, 255)];
|
|
val += Kernel[3] * sourceHistogram[ffxClamp(indexToRead + 3, 0, 255)];
|
|
val += Kernel[4] * sourceHistogram[ffxClamp(indexToRead + 4, 0, 255)];
|
|
val += Kernel[5] * sourceHistogram[ffxClamp(indexToRead + 5, 0, 255)];
|
|
val += Kernel[4] * sourceHistogram[ffxClamp(indexToRead + 6, 0, 255)];
|
|
val += Kernel[3] * sourceHistogram[ffxClamp(indexToRead + 7, 0, 255)];
|
|
val += Kernel[2] * sourceHistogram[ffxClamp(indexToRead + 8, 0, 255)];
|
|
val += Kernel[1] * sourceHistogram[ffxClamp(indexToRead + 9, 0, 255)];
|
|
val += Kernel[0] * sourceHistogram[ffxClamp(indexToRead + 10, 0, 255)];
|
|
|
|
val += 1.0;
|
|
|
|
if (iGlobalId.y == 0)
|
|
{
|
|
if (iLocalIndex == 0)
|
|
filteredHistogram[255] = 1.0;
|
|
else
|
|
filteredHistogram[iLocalIndex - 1] = val;
|
|
}
|
|
else if (iGlobalId.y == 1)
|
|
{
|
|
filteredHistogram[iLocalIndex] = val;
|
|
}
|
|
else if (iGlobalId.y == 2)
|
|
{
|
|
if (iLocalIndex == 255)
|
|
filteredHistogram[0] = 1.0;
|
|
else
|
|
filteredHistogram[iLocalIndex + 1] = val;
|
|
}
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
tempBuffer[iLocalIndex] = filteredHistogram[iLocalIndex];
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
if (iLocalIndex < 128) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 128];
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
if (iLocalIndex < 64) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 64];
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
if (iLocalIndex < 32) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 32];
|
|
if (iLocalIndex < 16) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 16];
|
|
if (iLocalIndex < 8 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 8];
|
|
if (iLocalIndex < 4 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 4];
|
|
if (iLocalIndex < 2 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 2];
|
|
if (iLocalIndex < 1 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 1];
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
filteredHistogram[iLocalIndex] /= tempBuffer[0];
|
|
|
|
FfxFloat32 currentFilteredHistogramsValue = filteredHistogram[iLocalIndex];
|
|
FfxFloat32 previousHistogramsValue = LoadRwSCDPreviousHistogram(iGlobalId.x);
|
|
|
|
tempBuffer2[iLocalIndex] = FfxFloat32x2(
|
|
currentFilteredHistogramsValue * log(currentFilteredHistogramsValue / previousHistogramsValue),
|
|
previousHistogramsValue * log(previousHistogramsValue / currentFilteredHistogramsValue)
|
|
);
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
if (iLocalIndex < 128) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 128];
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
if (iLocalIndex < 64) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 64];
|
|
FFX_GROUP_MEMORY_BARRIER;
|
|
|
|
if (iLocalIndex < 32) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 32];
|
|
if (iLocalIndex < 16) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 16];
|
|
if (iLocalIndex < 8 ) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 8];
|
|
if (iLocalIndex < 4 ) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 4];
|
|
if (iLocalIndex < 2 ) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 2];
|
|
|
|
if (iLocalIndex == 0)
|
|
{
|
|
FfxFloat32x2 sum = tempBuffer2[0] + tempBuffer2[1];
|
|
|
|
FfxFloat32 resFloat = 1 - exp(-(abs(sum.x) + abs(sum.y)));
|
|
FfxUInt32 resUInt = FfxUInt32((resFloat / FfxFloat32(HistogramCount)) * Factor);
|
|
AtomicIncrementSCDTemp(iGlobalId.y, resUInt);
|
|
|
|
FfxUInt32 oldFinishedGroupCount = AtomicIncrementSCDOutput(SCD_OUTPUT_COMPLETED_WORKGROUPS_SLOT, 1);
|
|
if (oldFinishedGroupCount == WhereToStop)
|
|
{
|
|
FfxUInt32 res0 = LoadRwSCDTemp(0);
|
|
FfxUInt32 res1 = LoadRwSCDTemp(1);
|
|
FfxUInt32 res2 = LoadRwSCDTemp(2);
|
|
FfxFloat32 sceneChangeValue = ffxMin(res0, ffxMin(res1, res2)) / Factor;
|
|
|
|
FfxUInt32 history = LoadRwSCDOutput(SCD_OUTPUT_HISTORY_BITS_SLOT) << 1;
|
|
if (CrossedSceneChangeThreshold(sceneChangeValue))
|
|
{
|
|
history |= 1;
|
|
}
|
|
StoreSCDOutput(SCD_OUTPUT_SCENE_CHANGE_SLOT, ffxAsUInt32(sceneChangeValue));
|
|
StoreSCDOutput(SCD_OUTPUT_HISTORY_BITS_SLOT, history);
|
|
StoreSCDOutput(SCD_OUTPUT_COMPLETED_WORKGROUPS_SLOT, 0);
|
|
|
|
ResetSCDTemp();
|
|
}
|
|
}
|
|
|
|
if (iGlobalId.y == 1)
|
|
{
|
|
StoreSCDPreviousHistogram(iGlobalId.x, currentFilteredHistogramsValue);
|
|
|
|
StoreSCDHistogram(iGlobalId.x, 0);
|
|
}
|
|
}
|
|
|
|
#endif // FFX_OPTICALFLOW_COMPUTE_SCD_DIVERGENCE_H
|