// This implementation is adapted from BuildProbabilityTables.compute #define COMPUTE_PATH_TRACING_SKY_SAMPLING_DATA #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch switch2 #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/ImageBasedLighting.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/PathTracing/Shaders/PathTracingSkySampling.hlsl" #define KERNEL_WIDTH 64 // Make sky data access a little less verbose #define CDF _PathTracingSkyCDFTexture #define MARGINAL _PathTracingSkyMarginalTexture #define TEX_WIDTH _PathTracingSkyTextureWidth #define TEX_HEIGHT _PathTracingSkyTextureHeight // Performs a block-level parallel scan. // Ref: GPU Gems 3, Chapter 39: "Parallel Prefix Sum (Scan) with CUDA". void ParallelScan(uint i, uint j, uint w, uint iterCount, RW_TEXTURE2D(float, buf), out float sum) { uint offset; // Execute the up-sweep phase. for (offset = 1; offset <= w / 2; offset *= 2) { AllMemoryBarrierWithGroupSync(); for (uint iter = 0; iter < iterCount; iter++) { uint idx = i + iter * KERNEL_WIDTH; // a1 = (2 * i + 1) * offset - 1 uint a1 = Mad24(Mad24(2u, idx, 1u), offset, -1); uint a2 = a1 + offset; if (a2 < w) { buf[uint2(a2, j)] += buf[uint2(a1, j)]; } } } AllMemoryBarrierWithGroupSync(); // Prevent NaNs arising from the division of 0 by 0. sum = max(buf[uint2(w - 1, j)], FLT_MIN); AllMemoryBarrierWithGroupSync(); // The exclusive scan requires the last element to be 0. if (i == 0) { buf[uint2(w - 1, j)] = 0.0; } // Execute the down-sweep phase. for (offset = w / 2; offset > 0; offset /= 2) { AllMemoryBarrierWithGroupSync(); for (uint iter = 0; iter < iterCount; iter++) { uint idx = i + iter * KERNEL_WIDTH; // a1 = (2 * i + 1) * offset - 1 uint a1 = Mad24(Mad24(2u, idx, 1u), offset, -1); uint a2 = a1 + offset; if (a2 < w) { float t1 = buf[uint2(a1, j)]; buf[uint2(a1, j)] = buf[uint2(a2, j)]; buf[uint2(a2, j)] += t1; } } } AllMemoryBarrierWithGroupSync(); } #pragma kernel ComputeCDF [numthreads(KERNEL_WIDTH, 1, 1)] void ComputeCDF(uint3 dispatchThreadId : SV_DispatchThreadID) { const uint i = dispatchThreadId.x; const uint j = dispatchThreadId.y; const uint iterCount = TEX_WIDTH / KERNEL_WIDTH; uint iter; float v = (j + 0.5) / TEX_HEIGHT; //float sinTheta = sin(v * PI); for (iter = 0; iter < iterCount; iter++) { uint idx = i + iter * KERNEL_WIDTH; float u = (idx + 0.5) / TEX_WIDTH; // No need for a sinTheta term in the PDF when using equiareal mapping float3 dir = MapUVToSkyDirection(u, v); CDF[uint2(idx, j)] = Luminance(SampleSkyTexture(dir, 0.0, 0).rgb); // * sinTheta; } float rowValSum; ParallelScan(i, j, TEX_WIDTH, iterCount, CDF, rowValSum); for (iter = 0; iter < iterCount; iter++) { uint idx = i + iter * KERNEL_WIDTH; CDF[uint2(idx, j)] /= rowValSum; } if (i == 0) { float rowIntegralValue = rowValSum / TEX_WIDTH; MARGINAL[uint2(j, 0)] = rowIntegralValue; } } #pragma kernel ComputeMarginal [numthreads(KERNEL_WIDTH, 1, 1)] void ComputeMarginal(uint3 dispatchThreadId : SV_DispatchThreadID) { const uint i = dispatchThreadId.x; const uint iterCount = TEX_HEIGHT / KERNEL_WIDTH; float rowValSum; ParallelScan(i, 0, TEX_HEIGHT, iterCount, MARGINAL, rowValSum); for (uint iter = 0; iter < iterCount; iter++) { uint idx = i + iter * KERNEL_WIDTH; MARGINAL[uint2(idx, 0)] /= rowValSum; } if (i == 0) { float imgIntegralValue = rowValSum / TEX_HEIGHT; float pdfNormalization = imgIntegralValue > 0.0 ? rcp(imgIntegralValue) * 0.25 * INV_PI : 0.0; MARGINAL[uint2(0, 0)] = pdfNormalization; } }