You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

149 lines
4.2 KiB

// This implementation is adapted from BuildProbabilityTables.compute
#define COMPUTE_PATH_TRACING_SKY_SAMPLING_DATA
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/ImageBasedLighting.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/PathTracing/Shaders/PathTracingSkySampling.hlsl"
#define KERNEL_WIDTH 64
// Make sky data access a little less verbose
#define CDF _PathTracingSkyCDFTexture
#define MARGINAL _PathTracingSkyMarginalTexture
#define TEX_WIDTH _PathTracingSkyTextureWidth
#define TEX_HEIGHT _PathTracingSkyTextureHeight
// Performs a block-level parallel scan.
// Ref: GPU Gems 3, Chapter 39: "Parallel Prefix Sum (Scan) with CUDA".
void ParallelScan(uint i, uint j, uint w, uint iterCount, RW_TEXTURE2D(float, buf), out float sum)
{
uint offset;
// Execute the up-sweep phase.
for (offset = 1; offset <= w / 2; offset *= 2)
{
AllMemoryBarrierWithGroupSync();
for (uint iter = 0; iter < iterCount; iter++)
{
uint idx = i + iter * KERNEL_WIDTH;
// a1 = (2 * i + 1) * offset - 1
uint a1 = Mad24(Mad24(2u, idx, 1u), offset, -1);
uint a2 = a1 + offset;
if (a2 < w)
{
buf[uint2(a2, j)] += buf[uint2(a1, j)];
}
}
}
AllMemoryBarrierWithGroupSync();
// Prevent NaNs arising from the division of 0 by 0.
sum = max(buf[uint2(w - 1, j)], FLT_MIN);
AllMemoryBarrierWithGroupSync();
// The exclusive scan requires the last element to be 0.
if (i == 0)
{
buf[uint2(w - 1, j)] = 0.0;
}
// Execute the down-sweep phase.
for (offset = w / 2; offset > 0; offset /= 2)
{
AllMemoryBarrierWithGroupSync();
for (uint iter = 0; iter < iterCount; iter++)
{
uint idx = i + iter * KERNEL_WIDTH;
// a1 = (2 * i + 1) * offset - 1
uint a1 = Mad24(Mad24(2u, idx, 1u), offset, -1);
uint a2 = a1 + offset;
if (a2 < w)
{
float t1 = buf[uint2(a1, j)];
buf[uint2(a1, j)] = buf[uint2(a2, j)];
buf[uint2(a2, j)] += t1;
}
}
}
AllMemoryBarrierWithGroupSync();
}
#pragma kernel ComputeCDF
[numthreads(KERNEL_WIDTH, 1, 1)]
void ComputeCDF(uint3 dispatchThreadId : SV_DispatchThreadID)
{
const uint i = dispatchThreadId.x;
const uint j = dispatchThreadId.y;
const uint iterCount = TEX_WIDTH / KERNEL_WIDTH;
uint iter;
float v = (j + 0.5) / TEX_HEIGHT;
//float sinTheta = sin(v * PI);
for (iter = 0; iter < iterCount; iter++)
{
uint idx = i + iter * KERNEL_WIDTH;
float u = (idx + 0.5) / TEX_WIDTH;
// No need for a sinTheta term in the PDF when using equiareal mapping
float3 dir = MapUVToSkyDirection(u, v);
CDF[uint2(idx, j)] = Luminance(SampleSkyTexture(dir, 0.0, 0).rgb); // * sinTheta;
}
float rowValSum;
ParallelScan(i, j, TEX_WIDTH, iterCount, CDF, rowValSum);
for (iter = 0; iter < iterCount; iter++)
{
uint idx = i + iter * KERNEL_WIDTH;
CDF[uint2(idx, j)] /= rowValSum;
}
if (i == 0)
{
float rowIntegralValue = rowValSum / TEX_WIDTH;
MARGINAL[uint2(j, 0)] = rowIntegralValue;
}
}
#pragma kernel ComputeMarginal
[numthreads(KERNEL_WIDTH, 1, 1)]
void ComputeMarginal(uint3 dispatchThreadId : SV_DispatchThreadID)
{
const uint i = dispatchThreadId.x;
const uint iterCount = TEX_HEIGHT / KERNEL_WIDTH;
float rowValSum;
ParallelScan(i, 0, TEX_HEIGHT, iterCount, MARGINAL, rowValSum);
for (uint iter = 0; iter < iterCount; iter++)
{
uint idx = i + iter * KERNEL_WIDTH;
MARGINAL[uint2(idx, 0)] /= rowValSum;
}
if (i == 0)
{
float imgIntegralValue = rowValSum / TEX_HEIGHT;
float pdfNormalization = imgIntegralValue > 0.0 ? rcp(imgIntegralValue) * 0.25 * INV_PI : 0.0;
MARGINAL[uint2(0, 0)] = pdfNormalization;
}
}