You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
146 lines
4.3 KiB
146 lines
4.3 KiB
|
|
#pragma kernel CSVFXPrepareSingleInstance PREFIX_SUM_THREAD_COUNT=1
|
|
#pragma kernel CSVFXBatchSumCount VFX_BATCH_COUNT=CSVFXBatchSumCount PREFIX_SUM_THREAD_COUNT=1024
|
|
#pragma kernel CSVFXBatchSumCount_128 VFX_BATCH_COUNT=CSVFXBatchSumCount_128 PREFIX_SUM_THREAD_COUNT=128
|
|
#pragma kernel CSVFXBuildPrefixSum VFX_BUILD_PREFIX_SUM=CSVFXBuildPrefixSum VFX_USE_INSTANCING=1 PREFIX_SUM_THREAD_COUNT=1024
|
|
#pragma kernel CSVFXBuildPrefixSum_128 VFX_BUILD_PREFIX_SUM=CSVFXBuildPrefixSum_128 VFX_USE_INSTANCING=1 PREFIX_SUM_THREAD_COUNT=128
|
|
|
|
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch switch2 glcore gles3 webgpu
|
|
|
|
#include "HLSLSupport.cginc"
|
|
|
|
CBUFFER_START(Uniform)
|
|
uint dstOffset;
|
|
uint srcOffset;
|
|
uint size;
|
|
float4 instancingConstants;
|
|
uint2 instancingBufferOffsets;
|
|
CBUFFER_END
|
|
|
|
#if VFX_USE_INSTANCING
|
|
#define VFX_INSTANCING_ACTIVE_INDIRECTION 1
|
|
#define VFX_INSTANCING_BATCH_INDIRECTION 1
|
|
#define VFX_INSTANCING_FIXED_SIZE 1
|
|
#include "VFXInstancing.hlsl"
|
|
#endif
|
|
|
|
#define PREFIX_SUM_MAX_SIZE 1024
|
|
#define PREFIX_SUM_PER_THREAD (PREFIX_SUM_MAX_SIZE / PREFIX_SUM_THREAD_COUNT)
|
|
|
|
StructuredBuffer<uint> srcStructBufferUint;
|
|
RWByteAddressBuffer dstBuffer;
|
|
RWStructuredBuffer<uint> dstStructBuffer;
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void CSVFXPrepareSingleInstance(uint3 threadId : SV_DispatchThreadID)
|
|
{
|
|
uint index = threadId.x;
|
|
if (index < 1)
|
|
{
|
|
// Count always placed at the beginning of the buffer
|
|
const uint srcOffset = 0u;
|
|
uint count = dstStructBuffer[srcOffset];
|
|
|
|
// Copy count to indirect buffer
|
|
dstBuffer.Store(dstOffset << 2, count);
|
|
|
|
// Copy count to prefix sum (after count and total count)
|
|
dstStructBuffer[srcOffset + 2] = count;
|
|
|
|
// reset event count
|
|
dstStructBuffer[srcOffset] = 0u;
|
|
}
|
|
}
|
|
|
|
groupshared unsigned int batchSumCount = 0u;
|
|
[numthreads(PREFIX_SUM_THREAD_COUNT, 1, 1)]
|
|
void VFX_BATCH_COUNT(uint3 threadId : SV_DispatchThreadID)
|
|
{
|
|
// Initialize to 0 (should not be required)
|
|
if (threadId.x == 0)
|
|
{
|
|
batchSumCount = 0;
|
|
}
|
|
|
|
// Wait for initialization
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Accumulate valid threads
|
|
[unroll]
|
|
for (int i = 0; i < PREFIX_SUM_PER_THREAD; ++i)
|
|
{
|
|
uint index = threadId.x * PREFIX_SUM_PER_THREAD + i;
|
|
if (index < size)
|
|
{
|
|
uint load = srcStructBufferUint[index];
|
|
uint original;
|
|
InterlockedAdd(batchSumCount, load, original);
|
|
}
|
|
}
|
|
|
|
// Wait for all threads to finish adding
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Store the final value (only first thread)
|
|
if (threadId.x == 0)
|
|
{
|
|
dstBuffer.Store(dstOffset << 2, batchSumCount);
|
|
}
|
|
}
|
|
|
|
groupshared unsigned int prefixSum[PREFIX_SUM_MAX_SIZE];
|
|
[numthreads(PREFIX_SUM_THREAD_COUNT, 1, 1)]
|
|
void VFX_BUILD_PREFIX_SUM(uint3 threadId : SV_DispatchThreadID)
|
|
{
|
|
// read values from buffer
|
|
[unroll]
|
|
for (int i = 0; i < PREFIX_SUM_PER_THREAD; ++i)
|
|
{
|
|
uint index = threadId.x * PREFIX_SUM_PER_THREAD + i;
|
|
if (index < size)
|
|
{
|
|
uint srcIndex = index;
|
|
#if VFX_USE_INSTANCING
|
|
uint instanceIndex, instanceActiveIndex, instanceCurrentIndex;
|
|
VFXInitInstancing(index, instanceIndex, instanceActiveIndex, instanceCurrentIndex);
|
|
srcIndex = instanceIndex;
|
|
#endif
|
|
prefixSum[index] = dstStructBuffer[srcIndex + srcOffset];
|
|
}
|
|
}
|
|
|
|
// perform prefix sum (Sklansky)
|
|
for (uint j = 1; j < size; j <<= 1)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
uint mask = ~(j - 1);
|
|
[unroll]
|
|
for (int ii = 0; ii < PREFIX_SUM_PER_THREAD; ++ii)
|
|
{
|
|
uint index = threadId.x * PREFIX_SUM_PER_THREAD + ii;
|
|
if ((index & j) != 0)
|
|
prefixSum[index] += prefixSum[(index & mask) - 1];
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// write values to the buffer
|
|
[unroll]
|
|
for (int iii = 0; iii < PREFIX_SUM_PER_THREAD; ++iii)
|
|
{
|
|
uint index = threadId.x * PREFIX_SUM_PER_THREAD + iii;
|
|
#if VFX_USE_INSTANCING
|
|
// reset event count for all instances
|
|
if (index < instancingBatchSize)
|
|
{
|
|
dstStructBuffer[index + srcOffset] = 0u;
|
|
}
|
|
#endif
|
|
|
|
if (index < size)
|
|
{
|
|
dstStructBuffer[index + dstOffset] = prefixSum[index];
|
|
}
|
|
}
|
|
}
|