You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

435 lines
18 KiB

#pragma kernel RaytracingIndirectDiffuseHalfRes
#pragma kernel RaytracingIndirectDiffuseFullRes
#pragma kernel IndirectDiffuseIntegrationUpscaleHalfRes
#pragma kernel IndirectDiffuseIntegrationUpscaleFullRes
#pragma only_renderers d3d11 xboxseries ps5
// Include and define the shader pass
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/ShaderPass/ShaderPass.cs.hlsl"
#define SHADERPASS SHADERPASS_RAYTRACING
// HDRP generic includes
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Material.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/NormalBuffer.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceLighting.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDStencilUsage.cs.hlsl"
// Raytracing includes
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/ShaderVariablesRaytracing.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl"
// #define WITHOUT_LDS
// Tile size of this compute
#define RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE 8
TEXTURE2D_X(_DepthTexture);
TYPED_TEXTURE2D_X(uint2, _StencilTexture);
RW_TEXTURE2D_X(float4, _RaytracingDirectionBuffer);
[numthreads(RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void RaytracingIndirectDiffuseHalfRes(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
// Compute the pixel position to process
uint2 halfResCoord = groupId * RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE + groupThreadId;
// Pixel coordinate in full res of the pixel that we will be using for our computation
uint2 sourceCoord = ComputeSourceCoordinates(halfResCoord, _RayTracingCheckerIndex);
// Initialize the buffer with invalid values
_RaytracingDirectionBuffer[COORD_TEXTURE2D_X(sourceCoord)] = float4(0.0, 0.0, 0.0, -1.0f);
// Read the depth value
float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, sourceCoord).r;
uint stencilValue = GetStencilValue(LOAD_TEXTURE2D_X(_StencilTexture, sourceCoord));
// This point is part of the background or is unlit, we don't really care
if (depthValue == UNITY_RAW_FAR_CLIP_VALUE || (stencilValue & STENCILUSAGE_IS_UNLIT) != 0)
return;
// Convert this to a world space position
PositionInputs posInput = GetPositionInput(sourceCoord, _ScreenSize.zw, depthValue, UNITY_MATRIX_I_VP, GetWorldToViewMatrix(), 0);
// Compute the view in world space
const float3 viewWS = GetWorldSpaceNormalizeViewDir(posInput.positionWS);
// Decode the world space normal
NormalData normalData;
DecodeFromNormalBuffer(sourceCoord, normalData);
// Generate the new sample (follwing values of the sequence)
float2 newSample;
newSample.x = GetBNDSequenceSample(halfResCoord, _RaytracingFrameIndex, 0);
newSample.y = GetBNDSequenceSample(halfResCoord, _RaytracingFrameIndex, 1);
// Importance sample with a cosine lobe
float3 sampleDir = SampleHemisphereCosine(newSample.x, newSample.y, normalData.normalWS);
// PDF is the cosine
float samplePDF = dot(sampleDir, normalData.normalWS);
// In the second texture, we store the sampled direction and the invPDF of the sample
_RaytracingDirectionBuffer[COORD_TEXTURE2D_X(sourceCoord)] = float4(sampleDir, 1.0 / samplePDF);
}
[numthreads(RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void RaytracingIndirectDiffuseFullRes(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
// Compute the pixel position to process
uint2 currentCoord = groupId * RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE + groupThreadId;
// Clear the output color texture
_RaytracingDirectionBuffer[COORD_TEXTURE2D_X(currentCoord)] = float4(0.0, 0.0, 0.0, -1.0f);
// Read the depth value
float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord).r;
uint stencilValue = GetStencilValue(LOAD_TEXTURE2D_X(_StencilTexture, currentCoord));
// This point is part of the background or is unlit, we don't really care
if (depthValue == UNITY_RAW_FAR_CLIP_VALUE || (stencilValue & STENCILUSAGE_IS_UNLIT) != 0)
return;
// Convert this to a world space position
PositionInputs posInput = GetPositionInput(currentCoord, _ScreenSize.zw, depthValue, UNITY_MATRIX_I_VP, GetWorldToViewMatrix(), 0);
// Compute the view in world space
const float3 viewWS = GetWorldSpaceNormalizeViewDir(posInput.positionWS);
// Decode the world space normal
NormalData normalData;
DecodeFromNormalBuffer(currentCoord, normalData);
// Generate the new sample (following values of the sequence)
float2 theSample;
theSample.x = GetBNDSequenceSample(currentCoord, _RaytracingFrameIndex, 0);
theSample.y = GetBNDSequenceSample(currentCoord, _RaytracingFrameIndex, 1);
// Importance sample with a cosine lobe
float3 sampleDir = SampleHemisphereCosine(theSample.x, theSample.y, normalData.normalWS);
// PDF is the cosine
float samplePDF = dot(sampleDir, normalData.normalWS);
// Write the output ray data
_RaytracingDirectionBuffer[COORD_TEXTURE2D_X(currentCoord)] = float4(sampleDir, 1.0 / samplePDF);
}
// Input textures for the spatial filtering
Texture2DArray<float> _BlueNoiseTexture;
// Output Textures for the spatial filtering
RW_TEXTURE2D_X(float4, _UpscaledIndirectDiffuseTextureRW);
#define NORMAL_REJECTION_THRESHOLD 0.5
struct NeighborTapData
{
float3 lighting;
float linearDepth;
float3 normalWS;
};
float EvaluateNeighborWeight(in NeighborTapData neighborData, float3 normalWS, float linearDepth)
{
// Initially the weight of this pixel is evaluate using the depth
float weight = 1.0f;
// If the candidate pixel is a background pixel, we cannot use it
if(neighborData.linearDepth == 1.0)
weight = 0.0f;
// If the normals of both pixels are too different, we cannot use it
if(dot(neighborData.normalWS, normalWS) < NORMAL_REJECTION_THRESHOLD)
weight *= 0.1f;
// Weight the sample by it's depth similarity
weight *= lerp(1.0, 0.0, saturate(abs(neighborData.linearDepth - linearDepth) / (linearDepth * 0.2)));
// Return the final weight
return weight;
}
#define HALF_RES_PER_THREAD_TAP_COUNT 2
#define HALF_RES_OUT_REGION_SIZE 3
#define HALF_RES_REGION_SIZE (HALF_RES_OUT_REGION_SIZE + 4 + HALF_RES_OUT_REGION_SIZE)
#define HALF_RES_REGION_SIZE_2 (HALF_RES_REGION_SIZE * HALF_RES_REGION_SIZE)
// LDS used to pre-fetch the neighborhood data (in Half Res) (10x10 region)
groupshared uint gs_cacheLighting_HR[HALF_RES_REGION_SIZE_2];
groupshared float gs_cacheDepth_HR[HALF_RES_REGION_SIZE_2];
groupshared uint gs_cacheNormal_HR[HALF_RES_REGION_SIZE_2];
void FillUpscaleNeighborhoodDataLDS_Half(uint groupIndex, uint2 groupOrigin)
{
// The initial position of the access
int2 originXY = groupOrigin / 2 - int2(HALF_RES_OUT_REGION_SIZE, HALF_RES_OUT_REGION_SIZE);
for (int i = 0; i < HALF_RES_PER_THREAD_TAP_COUNT; ++i)
{
uint sampleID = i + (groupIndex * HALF_RES_PER_THREAD_TAP_COUNT);
int offsetX = sampleID % HALF_RES_REGION_SIZE;
int offsetY = sampleID / HALF_RES_REGION_SIZE;
// Compute the full res sampling coordinate
int2 halfResCoord = int2(originXY.x + offsetX, originXY.y + offsetY);
// Evalaute the LDS index for this sample
int LDSIndex = offsetX + offsetY * HALF_RES_REGION_SIZE;
// Store the lighting into the LDS
int2 sampleCoordSignal = int2(clamp((uint)halfResCoord.x, (uint)0, (uint)_ScreenSize.x / 2 - 1), clamp((uint)halfResCoord.y, (uint)0, (uint)_ScreenSize.y / 2 - 1));
float3 lighting = LOAD_TEXTURE2D_X(_IndirectDiffuseTexture, sampleCoordSignal).xyz;
gs_cacheLighting_HR[LDSIndex] = PackToR11G11B10f(lighting);
// Store the depth and normal into the LDS
int2 sampleCoord = ComputeSourceCoordinates(halfResCoord, _RayTracingCheckerIndex);
sampleCoord = int2(clamp(sampleCoord.x, 0, (int)_ScreenSize.x - 1), clamp(sampleCoord.y, 0, (int)_ScreenSize.y - 1));
float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord).x;
gs_cacheDepth_HR[LDSIndex] = Linear01Depth(depthValue, _ZBufferParams);
float2 octNormalWS = Unpack888ToFloat2(LOAD_TEXTURE2D_X(_NormalBufferTexture, sampleCoord).xyz);
gs_cacheNormal_HR[LDSIndex] = f32tof16(octNormalWS.x) | f32tof16(octNormalWS.y) << 16;
}
}
NeighborTapData GetNeighborTapDataSample_HR(uint index)
{
NeighborTapData outVal;
outVal.lighting = UnpackFromR11G11B10f(gs_cacheLighting_HR[index]);
outVal.linearDepth = gs_cacheDepth_HR[index];
// Grab and unpack
uint packedNormal = gs_cacheNormal_HR[index];
float2 unpackedNormal = float2(f16tof32(packedNormal), f16tof32(packedNormal >> 16));
outVal.normalWS = UnpackNormalOctQuadEncode(unpackedNormal * 2.0 - 1.0);
return outVal;
}
uint OffsetToLDSAdress_HR(uint2 groupThreadId, int2 offset)
{
// Compute the tap coordinate in the 10x10 grid
uint2 tapAddress = (uint2)((int2)(groupThreadId / 2 + HALF_RES_OUT_REGION_SIZE) + offset);
return clamp((uint)(tapAddress.x) + tapAddress.y * HALF_RES_REGION_SIZE, 0, HALF_RES_REGION_SIZE_2 - 1);
}
NeighborTapData GetNeighborTapDataSample_HR(uint2 groupThreadId, int2 offset)
{
return GetNeighborTapDataSample_HR(OffsetToLDSAdress_HR(groupThreadId, offset));
}
NeighborTapData GetNeighborTapDataSample_HR_NOLDS(uint2 fulLResCoord, int2 offset)
{
int2 tapCoord = (fulLResCoord / 2 + offset) * 2;
tapCoord = int2(clamp(tapCoord.x, 0, (int)_ScreenSize.x - 1), clamp(tapCoord.y, 0, (int)_ScreenSize.y - 1));
NeighborTapData outVal;
outVal.lighting = LOAD_TEXTURE2D_X(_IndirectDiffuseTexture, tapCoord / 2).xyz;
outVal.linearDepth = Linear01Depth(LOAD_TEXTURE2D_X(_DepthTexture, tapCoord).x, _ZBufferParams);
float4 normalBuffer = LOAD_TEXTURE2D_X(_NormalBufferTexture, tapCoord);
NormalData normalData;
DecodeFromNormalBuffer(normalBuffer, normalData);
outVal.normalWS = normalData.normalWS;
return outVal;
}
[numthreads(RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void IndirectDiffuseIntegrationUpscaleHalfRes(uint3 dispatchThreadId : SV_DispatchThreadID,
int groupIndex : SV_GroupIndex,
uint2 groupThreadId : SV_GroupThreadID,
uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
#ifndef WITHOUT_LDS
// Only the 50 first workers will so something
// Load 2 value per thread
if (groupIndex < 50)
FillUpscaleNeighborhoodDataLDS_Half(groupIndex, groupId * 8);
// Make sure all values are loaded in LDS by now.
GroupMemoryBarrierWithGroupSync();
#endif
// Compute the half res coordinate that we shall be using for our effect
uint2 targetCoord = dispatchThreadId.xy;
// Fetch the depth
float depth = LOAD_TEXTURE2D_X(_DepthTexture, targetCoord).x;
float linearDepth = Linear01Depth(depth, _ZBufferParams);
// Fetch the current normal data
NormalData normalData;
DecodeFromNormalBuffer(targetCoord, normalData);
// If this is a background pixel, we are done
if (depth == UNITY_RAW_FAR_CLIP_VALUE)
return;
// Initialize the output pixels
float3 lightingSum = float3(0.0 ,0.0, 0.0);
float weightSum = 0;
for(int y = -HALF_RES_OUT_REGION_SIZE; y < HALF_RES_OUT_REGION_SIZE; ++y)
{
for(int x = -HALF_RES_OUT_REGION_SIZE; x < HALF_RES_OUT_REGION_SIZE; ++x)
{
#ifndef WITHOUT_LDS
// Grab the neighbor data
NeighborTapData neighborData = GetNeighborTapDataSample_HR(groupThreadId, int2(x,y));
#else
NeighborTapData neighborData = GetNeighborTapDataSample_HR_NOLDS(targetCoord, int2(x,y));
#endif
// Evaluate the weight of this neighbor
float weight = EvaluateNeighborWeight(neighborData, normalData.normalWS, linearDepth);
// Contribute to all the output values
lightingSum += neighborData.lighting * weight;
weightSum += weight;
}
}
// Compute the full res coordinate
if(weightSum == 0.0f)
{
_UpscaledIndirectDiffuseTextureRW[COORD_TEXTURE2D_X(targetCoord)] = float4(0.0f, 0.0f, 0.0f, 0.0f);
}
else
{
_UpscaledIndirectDiffuseTextureRW[COORD_TEXTURE2D_X(targetCoord)] = float4(lightingSum / weightSum, 1.0);
}
}
#define FULL_RES_PER_THREAD_TAP_COUNT 4
#define FULL_RES_OUT_REGION_SIZE 4
#define FULL_RES_REGION_SIZE (FULL_RES_OUT_REGION_SIZE + 8 + FULL_RES_OUT_REGION_SIZE)
#define FULL_RES_REGION_SIZE_2 (FULL_RES_REGION_SIZE * FULL_RES_REGION_SIZE)
// LDS used to pre-fetch the neighborhood data (in Half Res) (10x10 region)
groupshared uint gs_cacheLighting_FR[FULL_RES_REGION_SIZE_2];
groupshared float gs_cacheDepth_FR[FULL_RES_REGION_SIZE_2];
groupshared uint gs_cacheNormal_FR[FULL_RES_REGION_SIZE_2];
void FillUpscaleNeighborhoodDataLDS_Full(uint groupIndex, uint2 groupOrigin)
{
// The initial position of the access
int2 originXY = groupOrigin - int2(FULL_RES_OUT_REGION_SIZE, FULL_RES_OUT_REGION_SIZE);
for (int i = 0; i < FULL_RES_PER_THREAD_TAP_COUNT; ++i)
{
uint sampleID = i + groupIndex * FULL_RES_PER_THREAD_TAP_COUNT;
int offsetX = sampleID % FULL_RES_REGION_SIZE;
int offsetY = sampleID / FULL_RES_REGION_SIZE;
int2 targetCoord = int2(originXY.x + offsetX, originXY.y + offsetY);
int2 sampleCoord = int2(clamp(targetCoord.x, 0, _ScreenSize.x - 1), clamp(targetCoord.y, 0, _ScreenSize.y - 1));
// Read all the values for tap
float3 lighting = LOAD_TEXTURE2D_X(_IndirectDiffuseTexture, sampleCoord).xyz;
float depthValue = LOAD_TEXTURE2D_X(_DepthTexture, sampleCoord).x;
float2 octNormalWS = Unpack888ToFloat2(LOAD_TEXTURE2D_X(_NormalBufferTexture, sampleCoord).xyz);
int LDSIndex = offsetX + offsetY * FULL_RES_REGION_SIZE;
gs_cacheLighting_FR[LDSIndex] = PackToR11G11B10f(lighting);
gs_cacheDepth_FR[LDSIndex] = Linear01Depth(depthValue, _ZBufferParams);
gs_cacheNormal_FR[LDSIndex] = f32tof16(octNormalWS.x) | f32tof16(octNormalWS.y) << 16;
}
}
NeighborTapData GetNeighborTapDataSample_FR(uint index)
{
NeighborTapData outVal;
outVal.lighting = UnpackFromR11G11B10f(gs_cacheLighting_FR[index]);
outVal.linearDepth = gs_cacheDepth_FR[index];
// Grab the packed normal
uint packedNormal = gs_cacheNormal_FR[index];
// Unpack it to float2
float2 unpackedNormal = float2(f16tof32(packedNormal), f16tof32(packedNormal >> 16));
// Unpack it to world space normal
outVal.normalWS = UnpackNormalOctQuadEncode(unpackedNormal * 2.0 - 1.0);
return outVal;
}
uint OffsetToLDSAdress_FR(uint2 groupThreadId, int2 offset)
{
// Compute the tap coordinate in the 16x16 grid
uint2 tapAddress = (uint2)((int2)(groupThreadId + FULL_RES_OUT_REGION_SIZE) + offset);
return clamp((uint)(tapAddress.x) + tapAddress.y * FULL_RES_REGION_SIZE, 0, FULL_RES_REGION_SIZE_2 - 1);
}
NeighborTapData GetNeighborTapDataSample_FR(uint2 groupThreadId, int2 offset)
{
return GetNeighborTapDataSample_FR(OffsetToLDSAdress_FR(groupThreadId, offset));
}
NeighborTapData GetNeighborTapDataSample_FR_NOLDS(uint2 fulLResCoord, int2 offset)
{
int2 tapCoord = fulLResCoord + offset;
NeighborTapData outVal;
outVal.lighting = LOAD_TEXTURE2D_X(_IndirectDiffuseTexture, tapCoord).xyz;
outVal.linearDepth = Linear01Depth(LOAD_TEXTURE2D_X(_DepthTexture, tapCoord).x, _ZBufferParams);
float4 normalBuffer = LOAD_TEXTURE2D_X(_NormalBufferTexture, tapCoord);
NormalData normalData;
DecodeFromNormalBuffer(normalBuffer, normalData);
outVal.normalWS = normalData.normalWS;
return outVal;
}
[numthreads(RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, RAYTRACING_INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void IndirectDiffuseIntegrationUpscaleFullRes(uint3 dispatchThreadId : SV_DispatchThreadID,
int groupIndex : SV_GroupIndex,
uint2 groupThreadId : SV_GroupThreadID,
uint2 groupId : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
#ifndef WITHOUT_LDS
// Load 4 value per thread
FillUpscaleNeighborhoodDataLDS_Full(groupIndex, groupId * 8);
// Make sure all values are loaded in LDS by now.
GroupMemoryBarrierWithGroupSync();
#endif
uint2 targetCoord = dispatchThreadId.xy;
// Fetch the depth
float depth = LOAD_TEXTURE2D_X(_DepthTexture, targetCoord).x;
float linearDepth = Linear01Depth(depth, _ZBufferParams);
NormalData normalData;
DecodeFromNormalBuffer(targetCoord, normalData);
if (depth == UNITY_RAW_FAR_CLIP_VALUE)
return;
// Initialize the output pixels
float3 lightingSum = 0.0;
float weightSum = 0;
for(int y = -FULL_RES_OUT_REGION_SIZE; y < FULL_RES_OUT_REGION_SIZE; ++y)
{
for(int x = -FULL_RES_OUT_REGION_SIZE; x < FULL_RES_OUT_REGION_SIZE; ++x)
{
#ifndef WITHOUT_LDS
// Grab the neighbor data
NeighborTapData neighborData = GetNeighborTapDataSample_FR(groupThreadId, int2(x,y));
#else
NeighborTapData neighborData = GetNeighborTapDataSample_FR_NOLDS(targetCoord, int2(x,y));
#endif
// Evaluate the weight of this neighbor
float weight = EvaluateNeighborWeight(neighborData, normalData.normalWS, linearDepth);
// Contribute to all the output values
lightingSum += neighborData.lighting * weight;
weightSum += weight;
}
}
_UpscaledIndirectDiffuseTextureRW[COORD_TEXTURE2D_X(targetCoord)] = float4(lightingSum / weightSum, 1.0);
}