#pragma kernel GeneratePointDistribution #pragma kernel BilateralFilterSingle BILATERAL_FILTER=BilateralFilterSingle SINGLE_CHANNEL #pragma kernel BilateralFilterColor BILATERAL_FILTER=BilateralFilterColor #pragma kernel GatherSingle GATHER_FILTER=GatherSingle SINGLE_CHANNEL #pragma kernel GatherColor GATHER_FILTER=GatherColor #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch // We need the stencil flag of this. #define BILATERLAL_UNLIT // #pragma enable_d3d11_debug_symbols // Common includes #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonLighting.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Sampling/Sampling.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl" // HDRP includes #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/NormalBuffer.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Builtin/BuiltinData.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariablesGlobal.cs.hlsl" // Ray Tracing includes #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/ShaderVariablesRaytracing.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/BilateralFilter.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DenoisingUtils.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl" // Tile size of this compute shaders #define DIFFUSE_DENOISER_TILE_SIZE 8 // Noisy Input Buffer TEXTURE2D_X(_DenoiseInputTexture); // Buffer used for point sampling RWStructuredBuffer _PointDistributionRW; StructuredBuffer _PointDistribution; // Filtered Output buffer (depends on the singel or color variant of the denoiser) #if SINGLE_CHANNEL RW_TEXTURE2D_X(float, _DenoiseOutputTextureRW); #else RW_TEXTURE2D_X(float4, _DenoiseOutputTextureRW); #endif // Radius of the filter (world space) float4 _DenoiserResolutionMultiplierVals; float _DenoiserFilterRadius; float _PixelSpreadAngleTangent; int _JitterFramePeriod; #define PIXEL_RADIUS_TOLERANCE_THRESHOLD 2 // Flag used to do a half resolution filter int _HalfResolutionFilter; [numthreads(64, 1, 1)] void GeneratePointDistribution(uint3 dispatchThreadId : SV_DispatchThreadID) { _PointDistributionRW[dispatchThreadId.x] = SampleDiskCubic(GetLDSequenceSampleFloat(dispatchThreadId.x, 0), GetLDSequenceSampleFloat(dispatchThreadId.x, 1)); } float ComputeMaxDenoisingRadius(float3 positionRWS) { // Compute the distance to the pixel float distanceToPoint = length(positionRWS); // This is purely empirical, values were obtained while experimenting with various scenes and these valuesgive good visual results. // The world space radius for sample picking goes from distance/10.0 to distance/50.0 linearly until reaching 500.0 meters away from the camera // and it is always 20.0f (or two pixels if subpixel. // TODO: @Anis, I have a bunch of idea how to make this better and less empirical but it's for any other day return distanceToPoint * _DenoiserFilterRadius / lerp(5.0, 50.0, saturate(distanceToPoint / 500.0)); } [numthreads(DIFFUSE_DENOISER_TILE_SIZE, DIFFUSE_DENOISER_TILE_SIZE, 1)] void BILATERAL_FILTER(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); // Fetch the current pixel coordinate uint2 currentCoord = groupId * DIFFUSE_DENOISER_TILE_SIZE + groupThreadId; uint2 sourceCoord = (uint2)(currentCoord * _DenoiserResolutionMultiplierVals.y); // Read the central position const BilateralData center = TapBilateralData(sourceCoord); // If this is a background pixel, we are done if (center.z01 == 1.0 || center.isUnlit) { #if SINGLE_CHANNEL _DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = 0.0; #else _DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = float4(0.0, 0.0, 0.0, 1.0); #endif return; } // Create the local ortho basis for our sampling float3x3 localToWorld = GetLocalFrame(center.normal); // Intialize the accumulation values #if SINGLE_CHANNEL float colorSum = 0.0; float wSum = 0.0; #else float3 colorSum = 0.0; float wSum = 0.0; #endif // Compute the radius of the filter. This is evaluated as the max between a fixed radius value and an approximation of the footprint of the pixel const float denoisingRadius = ComputeMaxReprojectionWorldRadius(center.position, center.normal, _PixelSpreadAngleTangent, ComputeMaxDenoisingRadius(center.position), PIXEL_RADIUS_TOLERANCE_THRESHOLD); // Compute the sigma value const float sigma = 0.9 * denoisingRadius; // Index of the pixel in the 2x2 group that are used for the half res filter int localIndex = (currentCoord.x & 1) + (currentCoord.y & 1) * 2; // Define the sample count for this pixel. 16 samples per pixels if it is a full res or 4 if half resolution const int numSamples = _HalfResolutionFilter ? 4 : 16; int sampleOffset = (_HalfResolutionFilter != 0 ? localIndex * numSamples : 0); if (_JitterFramePeriod != -1) sampleOffset += _JitterFramePeriod * 16; // Loop through the samples that we need to aggrgate for (uint sampleIndex = 0; sampleIndex < (uint)numSamples; ++sampleIndex) { // Fetch the noise value for the current sample float2 newSample = _PointDistribution[sampleIndex + sampleOffset] * denoisingRadius; // Convert the point to hemogenous clip space float3 wsPos = center.position + localToWorld[0] * newSample.x + localToWorld[1] * newSample.y; float4 hClip = TransformWorldToHClip(wsPos); hClip.xyz /= hClip.w; // Is the target pixel in the screen? if (hClip.x > 1.0 || hClip.x < -1.0 || hClip.y > 1.0 || hClip.y < -1.0) continue; // Convert it to screen sample space float2 nDC = hClip.xy * 0.5 + 0.5; #if UNITY_UV_STARTS_AT_TOP nDC.y = 1.0 - nDC.y; #endif // Tap the data for this pixel // Not all pixels can be fetched (only the 2x2 representative) uint2 tapCoord = (nDC * _ScreenSize.xy); uint2 lowResTapCoord = (tapCoord) * _DenoiserResolutionMultiplierVals.x; // Fetch the corresponding data const BilateralData tapData = TapBilateralData(tapCoord); // If the tapped pixel is a background pixel or too far from the center pixel if (tapData.z01 == UNITY_RAW_FAR_CLIP_VALUE || tapData.isUnlit || abs(tapData.zNF - hClip.w) > 0.1) continue; // Compute the radius of the sample float r = length(newSample); // Compute the weight (skip computation for the center) const float w = r > 0.001f ? gaussian(r, sigma) * ComputeBilateralWeight(center, tapData) : 1.0; // Accumulate the new sample #if SINGLE_CHANNEL colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, lowResTapCoord).x * w; #else colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, lowResTapCoord).xyz * w; #endif wSum += w; } // If no samples were found, we take the center pixel only if (wSum == 0.0) { #if SINGLE_CHANNEL colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, currentCoord).x; #else colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, currentCoord).xyz; #endif wSum += 1.0; } // Normalize the result #if SINGLE_CHANNEL _DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = colorSum / wSum; #else _DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = float4(colorSum / wSum, 1.0); #endif } #define GATHER_REGION_SIZE DIFFUSE_DENOISER_TILE_SIZE #define GATHER_REGION_SIZE_2 (GATHER_REGION_SIZE * GATHER_REGION_SIZE) groupshared uint gs_cacheLighting[GATHER_REGION_SIZE_2]; groupshared float gs_cacheLuminance[GATHER_REGION_SIZE_2]; groupshared float gs_cacheDepth[GATHER_REGION_SIZE_2]; void FillGatherDataLDS(uint groupIndex, uint2 pixelCoord) { int2 sampleCoord = int2(clamp(pixelCoord.x, 0, _ScreenSize.x - 1), clamp(pixelCoord.y, 0, _ScreenSize.y - 1)); #ifdef SINGLE_CHANNEL gs_cacheLuminance[groupIndex] = LOAD_TEXTURE2D_X(_DenoiseInputTexture, sampleCoord).x; #else float3 lighting = LOAD_TEXTURE2D_X(_DenoiseInputTexture, sampleCoord).xyz; gs_cacheLighting[groupIndex] = PackToR11G11B10f(lighting); #endif float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord * _DenoiserResolutionMultiplierVals.y).x; gs_cacheDepth[groupIndex] = depthValue; } uint OffsetToLDSAdress(uint2 groupThreadId, int2 offset) { // Compute the tap coordinate in the 8x8 grid uint2 tapAddress = (uint2)((int2)(groupThreadId) + offset); return clamp(tapAddress.x + tapAddress.y * GATHER_REGION_SIZE, 0, GATHER_REGION_SIZE_2 - 1); } [numthreads(DIFFUSE_DENOISER_TILE_SIZE, DIFFUSE_DENOISER_TILE_SIZE, 1)] void GATHER_FILTER(uint3 centerCoord : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(centerCoord.z); // Fill color and lighting to the LDS FillGatherDataLDS(groupIndex, centerCoord.xy); // Make sure all values are loaded in LDS by now. GroupMemoryBarrierWithGroupSync(); // Read the high res depth int outputIdx = OffsetToLDSAdress(groupThreadId, int2(0, 0)); float targetDepth = gs_cacheDepth[outputIdx]; // Compute the 2x2 pixelregioncorner uint2 corner = centerCoord.xy - uint2(centerCoord.x & 1, centerCoord.y & 1); uint2 cornerGroupThread = corner - groupId * DIFFUSE_DENOISER_TILE_SIZE; // Grab the indices of the sub-region to use int ldsIdx0 = OffsetToLDSAdress(cornerGroupThread, int2(0, 0)); int ldsIdx1 = OffsetToLDSAdress(cornerGroupThread, int2(1, 0)); int ldsIdx2 = OffsetToLDSAdress(cornerGroupThread, int2(0, 1)); int ldsIdx3 = OffsetToLDSAdress(cornerGroupThread, int2(1, 1)); float4 lowDepths = float4(gs_cacheDepth[ldsIdx0], gs_cacheDepth[ldsIdx1], gs_cacheDepth[ldsIdx2], gs_cacheDepth[ldsIdx3]); #if SINGLE_CHANNEL float value = BilUpSingle_Uniform(targetDepth, lowDepths, float4(gs_cacheLuminance[ldsIdx0], gs_cacheLuminance[ldsIdx1], gs_cacheLuminance[ldsIdx2], gs_cacheLuminance[ldsIdx3])); _DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord.xy)] = value; #else _DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord.xy)] = float4(BilUpColor3_Uniform(targetDepth, lowDepths, UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx0]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx1]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx2]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx3])), 1.0); #endif }