You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

257 lines
11 KiB

#pragma kernel GeneratePointDistribution
#pragma kernel BilateralFilterSingle BILATERAL_FILTER=BilateralFilterSingle SINGLE_CHANNEL
#pragma kernel BilateralFilterColor BILATERAL_FILTER=BilateralFilterColor
#pragma kernel GatherSingle GATHER_FILTER=GatherSingle SINGLE_CHANNEL
#pragma kernel GatherColor GATHER_FILTER=GatherColor
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
// We need the stencil flag of this.
#define BILATERLAL_UNLIT
// #pragma enable_d3d11_debug_symbols
// Common includes
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonLighting.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Sampling/Sampling.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
// HDRP includes
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/NormalBuffer.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Builtin/BuiltinData.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariablesGlobal.cs.hlsl"
// Ray Tracing includes
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/ShaderVariablesRaytracing.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/BilateralFilter.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/Denoising/DenoisingUtils.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl"
// Tile size of this compute shaders
#define DIFFUSE_DENOISER_TILE_SIZE 8
// Noisy Input Buffer
TEXTURE2D_X(_DenoiseInputTexture);
// Buffer used for point sampling
RWStructuredBuffer<float2> _PointDistributionRW;
StructuredBuffer<float2> _PointDistribution;
// Filtered Output buffer (depends on the singel or color variant of the denoiser)
#if SINGLE_CHANNEL
RW_TEXTURE2D_X(float, _DenoiseOutputTextureRW);
#else
RW_TEXTURE2D_X(float4, _DenoiseOutputTextureRW);
#endif
// Radius of the filter (world space)
float4 _DenoiserResolutionMultiplierVals;
float _DenoiserFilterRadius;
float _PixelSpreadAngleTangent;
int _JitterFramePeriod;
#define PIXEL_RADIUS_TOLERANCE_THRESHOLD 2
// Flag used to do a half resolution filter
int _HalfResolutionFilter;
[numthreads(64, 1, 1)]
void GeneratePointDistribution(uint3 dispatchThreadId : SV_DispatchThreadID)
{
_PointDistributionRW[dispatchThreadId.x] = SampleDiskCubic(GetLDSequenceSampleFloat(dispatchThreadId.x, 0), GetLDSequenceSampleFloat(dispatchThreadId.x, 1));
}
float ComputeMaxDenoisingRadius(float3 positionRWS)
{
// Compute the distance to the pixel
float distanceToPoint = length(positionRWS);
// This is purely empirical, values were obtained while experimenting with various scenes and these valuesgive good visual results.
// The world space radius for sample picking goes from distance/10.0 to distance/50.0 linearly until reaching 500.0 meters away from the camera
// and it is always 20.0f (or two pixels if subpixel.
// TODO: @Anis, I have a bunch of idea how to make this better and less empirical but it's for any other day
return distanceToPoint * _DenoiserFilterRadius / lerp(5.0, 50.0, saturate(distanceToPoint / 500.0));
}
[numthreads(DIFFUSE_DENOISER_TILE_SIZE, DIFFUSE_DENOISER_TILE_SIZE, 1)]
void BILATERAL_FILTER(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
// Fetch the current pixel coordinate
uint2 currentCoord = groupId * DIFFUSE_DENOISER_TILE_SIZE + groupThreadId;
uint2 sourceCoord = (uint2)(currentCoord * _DenoiserResolutionMultiplierVals.y);
// Read the central position
const BilateralData center = TapBilateralData(sourceCoord);
// If this is a background pixel, we are done
if (center.z01 == 1.0 || center.isUnlit)
{
#if SINGLE_CHANNEL
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = 0.0;
#else
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = float4(0.0, 0.0, 0.0, 1.0);
#endif
return;
}
// Create the local ortho basis for our sampling
float3x3 localToWorld = GetLocalFrame(center.normal);
// Intialize the accumulation values
#if SINGLE_CHANNEL
float colorSum = 0.0;
float wSum = 0.0;
#else
float3 colorSum = 0.0;
float wSum = 0.0;
#endif
// Compute the radius of the filter. This is evaluated as the max between a fixed radius value and an approximation of the footprint of the pixel
const float denoisingRadius = ComputeMaxReprojectionWorldRadius(center.position, center.normal, _PixelSpreadAngleTangent, ComputeMaxDenoisingRadius(center.position), PIXEL_RADIUS_TOLERANCE_THRESHOLD);
// Compute the sigma value
const float sigma = 0.9 * denoisingRadius;
// Index of the pixel in the 2x2 group that are used for the half res filter
int localIndex = (currentCoord.x & 1) + (currentCoord.y & 1) * 2;
// Define the sample count for this pixel. 16 samples per pixels if it is a full res or 4 if half resolution
const int numSamples = _HalfResolutionFilter ? 4 : 16;
int sampleOffset = (_HalfResolutionFilter != 0 ? localIndex * numSamples : 0);
if (_JitterFramePeriod != -1)
sampleOffset += _JitterFramePeriod * 16;
// Loop through the samples that we need to aggrgate
for (uint sampleIndex = 0; sampleIndex < (uint)numSamples; ++sampleIndex)
{
// Fetch the noise value for the current sample
float2 newSample = _PointDistribution[sampleIndex + sampleOffset] * denoisingRadius;
// Convert the point to hemogenous clip space
float3 wsPos = center.position + localToWorld[0] * newSample.x + localToWorld[1] * newSample.y;
float4 hClip = TransformWorldToHClip(wsPos);
hClip.xyz /= hClip.w;
// Is the target pixel in the screen?
if (hClip.x > 1.0 || hClip.x < -1.0 || hClip.y > 1.0 || hClip.y < -1.0)
continue;
// Convert it to screen sample space
float2 nDC = hClip.xy * 0.5 + 0.5;
#if UNITY_UV_STARTS_AT_TOP
nDC.y = 1.0 - nDC.y;
#endif
// Tap the data for this pixel
// Not all pixels can be fetched (only the 2x2 representative)
uint2 tapCoord = (nDC * _ScreenSize.xy);
uint2 lowResTapCoord = (tapCoord) * _DenoiserResolutionMultiplierVals.x;
// Fetch the corresponding data
const BilateralData tapData = TapBilateralData(tapCoord);
// If the tapped pixel is a background pixel or too far from the center pixel
if (tapData.z01 == UNITY_RAW_FAR_CLIP_VALUE || tapData.isUnlit || abs(tapData.zNF - hClip.w) > 0.1)
continue;
// Compute the radius of the sample
float r = length(newSample);
// Compute the weight (skip computation for the center)
const float w = r > 0.001f ? gaussian(r, sigma) * ComputeBilateralWeight(center, tapData) : 1.0;
// Accumulate the new sample
#if SINGLE_CHANNEL
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, lowResTapCoord).x * w;
#else
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, lowResTapCoord).xyz * w;
#endif
wSum += w;
}
// If no samples were found, we take the center pixel only
if (wSum == 0.0)
{
#if SINGLE_CHANNEL
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, currentCoord).x;
#else
colorSum += LOAD_TEXTURE2D_X(_DenoiseInputTexture, currentCoord).xyz;
#endif
wSum += 1.0;
}
// Normalize the result
#if SINGLE_CHANNEL
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = colorSum / wSum;
#else
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(currentCoord)] = float4(colorSum / wSum, 1.0);
#endif
}
#define GATHER_REGION_SIZE DIFFUSE_DENOISER_TILE_SIZE
#define GATHER_REGION_SIZE_2 (GATHER_REGION_SIZE * GATHER_REGION_SIZE)
groupshared uint gs_cacheLighting[GATHER_REGION_SIZE_2];
groupshared float gs_cacheLuminance[GATHER_REGION_SIZE_2];
groupshared float gs_cacheDepth[GATHER_REGION_SIZE_2];
void FillGatherDataLDS(uint groupIndex, uint2 pixelCoord)
{
int2 sampleCoord = int2(clamp(pixelCoord.x, 0, _ScreenSize.x - 1), clamp(pixelCoord.y, 0, _ScreenSize.y - 1));
#ifdef SINGLE_CHANNEL
gs_cacheLuminance[groupIndex] = LOAD_TEXTURE2D_X(_DenoiseInputTexture, sampleCoord).x;
#else
float3 lighting = LOAD_TEXTURE2D_X(_DenoiseInputTexture, sampleCoord).xyz;
gs_cacheLighting[groupIndex] = PackToR11G11B10f(lighting);
#endif
float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord * _DenoiserResolutionMultiplierVals.y).x;
gs_cacheDepth[groupIndex] = depthValue;
}
uint OffsetToLDSAdress(uint2 groupThreadId, int2 offset)
{
// Compute the tap coordinate in the 8x8 grid
uint2 tapAddress = (uint2)((int2)(groupThreadId) + offset);
return clamp(tapAddress.x + tapAddress.y * GATHER_REGION_SIZE, 0, GATHER_REGION_SIZE_2 - 1);
}
[numthreads(DIFFUSE_DENOISER_TILE_SIZE, DIFFUSE_DENOISER_TILE_SIZE, 1)]
void GATHER_FILTER(uint3 centerCoord : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(centerCoord.z);
// Fill color and lighting to the LDS
FillGatherDataLDS(groupIndex, centerCoord.xy);
// Make sure all values are loaded in LDS by now.
GroupMemoryBarrierWithGroupSync();
// Read the high res depth
int outputIdx = OffsetToLDSAdress(groupThreadId, int2(0, 0));
float targetDepth = gs_cacheDepth[outputIdx];
// Compute the 2x2 pixelregioncorner
uint2 corner = centerCoord.xy - uint2(centerCoord.x & 1, centerCoord.y & 1);
uint2 cornerGroupThread = corner - groupId * DIFFUSE_DENOISER_TILE_SIZE;
// Grab the indices of the sub-region to use
int ldsIdx0 = OffsetToLDSAdress(cornerGroupThread, int2(0, 0));
int ldsIdx1 = OffsetToLDSAdress(cornerGroupThread, int2(1, 0));
int ldsIdx2 = OffsetToLDSAdress(cornerGroupThread, int2(0, 1));
int ldsIdx3 = OffsetToLDSAdress(cornerGroupThread, int2(1, 1));
float4 lowDepths = float4(gs_cacheDepth[ldsIdx0], gs_cacheDepth[ldsIdx1], gs_cacheDepth[ldsIdx2], gs_cacheDepth[ldsIdx3]);
#if SINGLE_CHANNEL
float value = BilUpSingle_Uniform(targetDepth, lowDepths, float4(gs_cacheLuminance[ldsIdx0], gs_cacheLuminance[ldsIdx1], gs_cacheLuminance[ldsIdx2], gs_cacheLuminance[ldsIdx3]));
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord.xy)] = value;
#else
_DenoiseOutputTextureRW[COORD_TEXTURE2D_X(centerCoord.xy)] = float4(BilUpColor3_Uniform(targetDepth, lowDepths, UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx0]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx1]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx2]), UnpackFromR11G11B10f(gs_cacheLighting[ldsIdx3])), 1.0);
#endif
}