You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
123 lines
5.6 KiB
123 lines
5.6 KiB
// From "Fast Fourier Transform for Image Processing in DirectX* 11" Jospeh S.
|
|
//--------------------------------------------------------------------------------------
|
|
// Copyright 2014 Intel Corporation
|
|
// All Rights Reserved
|
|
//
|
|
// Permission is granted to use, copy, distribute and prepare derivative works of this
|
|
// software for any purpose and without fee, provided, that the above copyright notice
|
|
// and this statement appear in all copies. Intel makes no representations about the
|
|
// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS."
|
|
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
|
|
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
|
|
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
|
|
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not
|
|
// assume any responsibility for any errors which may appear in this software nor any
|
|
// responsibility to update it.
|
|
//--------------------------------------------------------------------------------------
|
|
|
|
// Generic Graphics includes
|
|
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
|
|
|
|
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
|
|
|
|
// The set of possible kernels
|
|
#pragma kernel RowPassTi_256 FFT_PASS_TI=RowPassTi_256 FFT_RESOLUTION=256 BUTTERFLY_COUNT=8 COMPENSATION_FACTOR=1.0
|
|
#pragma kernel ColPassTi_256 FFT_PASS_TI=ColPassTi_256 COLUMN_PASS FFT_RESOLUTION=256 BUTTERFLY_COUNT=8 COMPENSATION_FACTOR=1.0
|
|
#pragma kernel RowPassTi_128 FFT_PASS_TI=RowPassTi_128 FFT_RESOLUTION=128 BUTTERFLY_COUNT=7 COMPENSATION_FACTOR=0.5
|
|
#pragma kernel ColPassTi_128 FFT_PASS_TI=ColPassTi_128 COLUMN_PASS FFT_RESOLUTION=128 BUTTERFLY_COUNT=7 COMPENSATION_FACTOR=0.5
|
|
#pragma kernel RowPassTi_64 FFT_PASS_TI=RowPassTi_64 FFT_RESOLUTION=64 BUTTERFLY_COUNT=6 COMPENSATION_FACTOR=0.25
|
|
#pragma kernel ColPassTi_64 FFT_PASS_TI=ColPassTi_64 COLUMN_PASS FFT_RESOLUTION=64 BUTTERFLY_COUNT=6 COMPENSATION_FACTOR=0.25
|
|
|
|
// Input and output textures
|
|
RWTexture2DArray<float4> _FFTRealBuffer;
|
|
RWTexture2DArray<float4> _FFTImaginaryBuffer;
|
|
RWTexture2DArray<float4> _FFTRealBufferRW;
|
|
RWTexture2DArray<float4> _FFTImaginaryBufferRW;
|
|
|
|
// SLM used to avoid having multiple dispatches per dimension
|
|
groupshared float3 pingPongArray[4][FFT_RESOLUTION];
|
|
|
|
// Unfortunately this breaks on vulkan, so I am doing it manually.
|
|
uint2 reversebits_uint2(const uint2 input)
|
|
{
|
|
uint2 x = input;
|
|
x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
|
|
x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
|
|
x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
|
|
x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
|
|
return((x >> 16) | (x << 16));
|
|
}
|
|
|
|
void GetButterflyValues(uint passIndex, uint x, out uint2 indices, out float2 weights)
|
|
{
|
|
uint sectionWidth = 2 << passIndex;
|
|
uint halfSectionWidth = sectionWidth / 2;
|
|
|
|
uint sectionStartOffset = x & ~(sectionWidth - 1);
|
|
uint halfSectionOffset = x & (halfSectionWidth - 1);
|
|
uint sectionOffset = x & (sectionWidth - 1);
|
|
|
|
sincos(TWO_PI * sectionOffset / (float)sectionWidth, weights.y, weights.x);
|
|
weights.y = -weights.y;
|
|
|
|
indices.x = sectionStartOffset + halfSectionOffset;
|
|
indices.y = sectionStartOffset + halfSectionOffset + halfSectionWidth;
|
|
|
|
if (passIndex == 0)
|
|
indices = reversebits_uint2(indices.xy) >> (32 - BUTTERFLY_COUNT) & (FFT_RESOLUTION - 1);
|
|
}
|
|
|
|
void ButterflyPass(int passIndex, uint x, uint t0, uint t1, out float3 resultR, out float3 resultI)
|
|
{
|
|
uint2 indices;
|
|
float2 weights;
|
|
GetButterflyValues(passIndex, x, indices, weights);
|
|
|
|
float3 inputR1 = pingPongArray[t0][indices.x];
|
|
float3 inputI1 = pingPongArray[t1][indices.x];
|
|
|
|
float3 inputR2 = pingPongArray[t0][indices.y];
|
|
float3 inputI2 = pingPongArray[t1][indices.y];
|
|
|
|
resultR = (inputR1 + weights.x * inputR2 + weights.y * inputI2);
|
|
resultI = (inputI1 - weights.y * inputR2 + weights.x * inputI2);
|
|
}
|
|
|
|
[numthreads(FFT_RESOLUTION, 1, 1)]
|
|
void FFT_PASS_TI(uint3 id : SV_DispatchThreadID)
|
|
{
|
|
uint3 position = id;
|
|
|
|
#ifdef COLUMN_PASS
|
|
uint2 texturePos = uint2(position.yx);
|
|
#else
|
|
uint2 texturePos = uint2(position.xy);
|
|
#endif
|
|
|
|
// Load entire row or column into scratch array
|
|
pingPongArray[0][position.x].xyz =_FFTRealBuffer[int3(texturePos, id.z)].xyz;
|
|
pingPongArray[1][position.x].xyz = _FFTImaginaryBuffer[int3(texturePos, id.z)].xyz;
|
|
|
|
uint4 textureIndices = uint4(0, 1, 2, 3);
|
|
for (int i = 0; i < BUTTERFLY_COUNT - 1; i++)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
ButterflyPass(i, position.x, textureIndices.x, textureIndices.y, pingPongArray[textureIndices.z][position.x].xyz, pingPongArray[textureIndices.w][position.x].xyz);
|
|
textureIndices.xyzw = textureIndices.zwxy;
|
|
}
|
|
|
|
// Final butterfly will write directly to the target texture
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// The final pass writes to the output UAV texture
|
|
float3 realValue = 0.0;
|
|
float3 imaginaryValue = 0.0;
|
|
ButterflyPass(BUTTERFLY_COUNT - 1, position.x, textureIndices.x, textureIndices.y, realValue, imaginaryValue);
|
|
#if defined(COLUMN_PASS)
|
|
float sign_correction_and_normalization = ((position.x + position.y) & 0x01) ? -1.0 : 1.0;
|
|
_FFTRealBufferRW[int3(texturePos, id.z)] = float4(realValue * sign_correction_and_normalization, 0.0);
|
|
#else
|
|
_FFTRealBufferRW[int3(texturePos, id.z)] = float4(realValue, 0.0);
|
|
_FFTImaginaryBufferRW[int3(texturePos, id.z)] = float4(imaginaryValue, 0.0);
|
|
#endif
|
|
}
|