#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch // Depth related kernels #pragma kernel DownscaleDepth // Trace to intermediate #pragma kernel ReprojectClouds REPROJECT_CLOUDS=ReprojectClouds #pragma kernel ReprojectCloudsRejection REPROJECT_CLOUDS=ReprojectCloudsRejection WITH_REJECTION #pragma kernel PreUpscaleClouds // Intermediate to Full resolution #pragma kernel UpscaleClouds UPSCALE_CLOUDS=UpscaleClouds #pragma kernel UpscaleCloudsPerceptual UPSCALE_CLOUDS=UpscaleCloudsPerceptual PERCEPTUAL_TRANSMITTANCE // Full resolution combination #pragma kernel CombineClouds COMBINE_CLOUDS=CombineClouds #pragma kernel CombineCloudsPerceptual COMBINE_CLOUDS=CombineCloudsPerceptual PERCEPTUAL_TRANSMITTANCE // #define WITHOUT_LDS // #pragma enable_d3d11_debug_symbols // HDRP generic includes #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/BilateralUpsample.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricClouds/VolumetricCloudsUtilities.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/VolumetricClouds/VolumetricCloudsDenoising.hlsl" // Input textures TEXTURE2D_X(_CameraColorTexture); TEXTURE2D_X(_DepthTexture); // History buffers TEXTURE2D_X(_HistoryVolumetricClouds0Texture); TEXTURE2D_X(_HistoryVolumetricClouds1Texture); // Output texture RW_TEXTURE2D_X(float, _HalfResDepthBufferRW); RW_TEXTURE2D_X(float4, _CloudsLightingTextureRW); RW_TEXTURE2D_X(float3, _CloudsAdditionalTextureRW); [numthreads(8, 8, 1)] void DownscaleDepth(uint3 intermediateCoord : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(intermediateCoord.z); // If this is bigger than the trace size, we are done if (any(intermediateCoord.xy >= uint2(_IntermediateScreenSize.xy))) return; // TODO USE LDS for this float depth0 = LOAD_TEXTURE2D_X(_DepthTexture, intermediateCoord.xy * 2.0).x; float depth1 = LOAD_TEXTURE2D_X(_DepthTexture, intermediateCoord.xy * 2.0 + int2(0, 1)).x; float depth2 = LOAD_TEXTURE2D_X(_DepthTexture, intermediateCoord.xy * 2.0 + int2(1, 1)).x; float depth3 = LOAD_TEXTURE2D_X(_DepthTexture, intermediateCoord.xy * 2.0 + int2(1, 0)).x; // Combine it with the current shift to define which half res depth should be used _HalfResDepthBufferRW[COORD_TEXTURE2D_X(intermediateCoord.xy)] = min(min(depth0, depth1), min(depth2, depth3)); } [numthreads(8, 8, 1)] void REPROJECT_CLOUDS(uint3 dispatchThreadId : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); // Compute the set of coordinates we need uint2 intermediateCoord = dispatchThreadId.xy; uint2 fullResCoord = intermediateCoord * _IntermediateResolutionScale; uint2 traceCoord = intermediateCoord / 2; #ifndef WITHOUT_LDS // Only 36 workers of the 64 do the pre-fetching if (groupIndex < 36) { // Load 1 value per thread FillCloudReprojectionLDS(groupIndex, groupId * 8); } // Make sure all values are loaded in LDS by now. GroupMemoryBarrierWithGroupSync(); #endif #ifdef WITHOUT_LDS // Average the depth of the cloud float currentCloudDepth = LOAD_TEXTURE2D_X(_CloudsDepthTexture, traceCoord).x; #else // Average the depth of the cloud float currentCloudDepth = GetCloudDepth_LDS(groupThreadId, int2(0, 0)); #endif // Compute the motionVector of the clouds float2 motionVector = EvaluateCloudMotionVectors(fullResCoord, currentCloudDepth, 1.0); // Compute the history pixel coordinate to tap from float2 historyCoord = (intermediateCoord.xy + 0.5) - motionVector * _IntermediateScreenSize.xy; float2 clampedHistoryUV = clamp(historyCoord, 0.0, _IntermediateScreenSize.xy - 0.5f) / _IntermediateScreenSize.xy; // Read the volumetric cloud value from the previous frame float2 ratioScale = _HistoryViewportSize / _HistoryBufferSize; float2 historySampleCoords = clampedHistoryUV * ratioScale; // Grab the history values float4 previousResult = SAMPLE_TEXTURE2D_X_LOD(_HistoryVolumetricClouds0Texture, s_linear_clamp_sampler, historySampleCoords, 0); float3 previousResult1 = SAMPLE_TEXTURE2D_X_LOD(_HistoryVolumetricClouds1Texture, s_linear_clamp_sampler, historySampleCoords, 0).xyz; // Inverse the exposure of the previous frame and apply the current one (need to be done in linear space) previousResult.xyz *= GetInversePreviousExposureMultiplier() * GetCurrentExposureMultiplier(); // Unpack the second buffer float previousSampleCount = previousResult1.x; float previousDepth = previousResult1.y; float previousCloudDepth = previousResult1.z; // Reproject previous cloud depth in case near plane has changed previousCloudDepth = saturate(previousCloudDepth * _NearPlaneReprojection); // This tracks if the history is considered valid bool validHistory = previousSampleCount >= 0.5f; // The history is invalid if we are requesting a value outside the frame if(historyCoord.x < 0.0 || historyCoord.x >= _IntermediateScreenSize.x || historyCoord.y < 0.0 || historyCoord.y >= _IntermediateScreenSize.y) validHistory = false; // Read the resolution of the current pixel float currentDepth = LOAD_TEXTURE2D_X(_HalfResDepthBuffer, intermediateCoord).x; // Compare the depth of the current pixel to the one of its history, if they are too different, we cannot consider this history valid float linearPrevDepth = Linear01Depth(previousDepth, _ZBufferParams); float linearCurrentDepth = Linear01Depth(currentDepth, _ZBufferParams); // We only need to check if the pixel depth coherence if the clouds can be behind and in front of the pixel if (abs(linearPrevDepth - linearCurrentDepth) > linearCurrentDepth * 0.2) validHistory = false; float validityFactor = 1.0; #ifdef WITH_REJECTION // We need to validate that within the 3x3 trace region, at least one of the pixels is not a background pixel (including the clouds) float4 lightingMin = float4(FLT_MAX, FLT_MAX, FLT_MAX, 1.0); float4 lightingMax = float4(0, 0, 0, 0.0); for (int y = -1; y <= 1; ++y) { for (int x = -1; x <= 1; ++x) { #ifdef WITHOUT_LDS float4 cloudLigting = LOAD_TEXTURE2D_X(_CloudsLightingTexture, traceCoord + int2(x, y)); #else float4 cloudLigting = GetCloudLighting_LDS(groupThreadId, int2(x, y)); #endif lightingMin = min(lightingMin, cloudLigting); lightingMax = max(lightingMax, cloudLigting); } } if (currentDepth == UNITY_RAW_FAR_CLIP_VALUE) previousResult = ClipCloudsToRegion(previousResult, lightingMin, lightingMax, validityFactor); #endif // Compute the local index that tells us the index of this pixel, the strategy for reprojection is a bit different in both cases int localIndex = (intermediateCoord.x & 1) + (intermediateCoord.y & 1) * 2; int currentIndex = ComputeCheckerBoardIndex(traceCoord, _SubPixelIndex); if (localIndex == currentIndex) { // We need to validate that within the 3x3 trace region, at least one of the pixels is not a background pixel (incluing the clouds) float cloudNeighborhood = 0.0f; for (int y = -1; y <= 1; ++y) { for (int x = -1; x <= 1; ++x) { #ifdef WITHOUT_LDS if (LOAD_TEXTURE2D_X(_CloudsDepthTexture, traceCoord + int2(x, y)).x != 0.0f) cloudNeighborhood += 1.0f; #else if (GetCloudDepth_LDS(groupThreadId, int2(x, y)) != 0.0f) cloudNeighborhood += 1.0f; #endif } } // If the target coordinate is out of the screen, we cannot use the history float accumulationFactor = 0.0; float sampleCount = 1.0; if (validHistory && cloudNeighborhood != 0.0f) { // Define our accumation value accumulationFactor = previousSampleCount >= 16.0 ? 0.94117647058 : (previousSampleCount / (previousSampleCount + 1.0)); accumulationFactor *= _TemporalAccumulationFactor * validityFactor * _CloudHistoryInvalidation; sampleCount = min(previousSampleCount + 1.0, 16.0); } // Accumulate the result with the previous frame #ifdef WITHOUT_LDS previousResult = accumulationFactor * previousResult + (1.0 - accumulationFactor) * LOAD_TEXTURE2D_X(_CloudsLightingTexture, traceCoord); #else previousResult = accumulationFactor * previousResult + (1.0 - accumulationFactor) * GetCloudLighting_LDS(groupThreadId, int2(0, 0)); #endif previousSampleCount = sampleCount; previousDepth = currentDepth; // If there are no clouds in the new pixel, we force the depth to zero. Otherwise, we are likely on a pixel // which state is not stable and we take the maximum of both frames as we cannot interpolate between the depths. previousCloudDepth = previousResult.w == 1.0 ? UNITY_RAW_FAR_CLIP_VALUE : currentCloudDepth; } else { // Reduce the history validity a bit previousSampleCount *= validityFactor * _CloudHistoryInvalidation; // If the target coordinate is out of the screen or the depth that was used to generate it // is too different from the one of the current pixel, we cannot use the history if (!validHistory) { // Structure that will hold everything NeighborhoodUpsampleData3x3 upsampleData; #ifdef WITHOUT_LDS FillCloudReprojectionNeighborhoodData_NOLDS(traceCoord, localIndex, upsampleData); #else FillCloudReprojectionNeighborhoodData(groupThreadId, localIndex, upsampleData); #endif // Make sure that at least one of the pixels in the neighborhood can be used bool rejectNeighborhood; int closestNeighbor = 4; OverrideMaskValues(currentDepth, upsampleData, rejectNeighborhood, closestNeighbor); if (rejectNeighborhood) { // We don't have any valid history and there is no neighbor that is usable, we consider that we have no clouds. previousResult = float4(0.0, 0.0, 0.0, 1.0); previousSampleCount = 0.0f; } else { // We don't have any history for this pixel, but there is at least one neighbor that can be used in the current frame tracing previousSampleCount = 1.0f; previousResult = BilUpColor3x3(currentDepth, upsampleData); // Due to numerical precision issues, upscaling a bunch of 1.0 can lead to a slightly lower number, this fixes it. if (EvaluateRegionEmptiness(upsampleData) == 1.0) previousResult = float4(0, 0, 0, 1); } #ifdef WITHOUT_LDS previousCloudDepth = LOAD_TEXTURE2D_X(_CloudsDepthTexture, traceCoord + IndexToLocalOffsetCoords[closestNeighbor]).x; #else previousCloudDepth = GetCloudDepth_LDS(groupThreadId, IndexToLocalOffsetCoords[closestNeighbor]); #endif } previousDepth = currentDepth; } // Make sure this doesn't go outside of the [0, 1] interval previousResult.w = saturate(previousResult.w); // Accumulate the result with the previous frame _CloudsLightingTextureRW[COORD_TEXTURE2D_X(intermediateCoord)] = previousResult; _CloudsAdditionalTextureRW[COORD_TEXTURE2D_X(intermediateCoord)] = float3(previousSampleCount, previousDepth, previousCloudDepth); } [numthreads(8, 8, 1)] void PreUpscaleClouds(uint3 dispatchThreadId : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); // Compute the set of coordinates we need uint2 intermediateCoord = dispatchThreadId.xy; uint2 traceCoord = intermediateCoord / 2; #ifndef WITHOUT_LDS // Only 36 workers of the 64 do the pre-fetching if (groupIndex < 36) { // Load 1 value per thread FillCloudReprojectionLDS(groupIndex, groupId * 8); } // Make sure all values are loaded in LDS by now. GroupMemoryBarrierWithGroupSync(); #endif #ifdef WITHOUT_LDS // Average the depth of the cloud float currentCloudDepth = LOAD_TEXTURE2D_X(_CloudsDepthTexture, traceCoord).x; #else // Average the depth of the cloud float currentCloudDepth = GetCloudDepth_LDS(groupThreadId, int2(0, 0)); #endif float cloudCloudDepth = 0; float currentDepth = LOAD_TEXTURE2D_X(_HalfResDepthBuffer, intermediateCoord).x; float4 cloudLighting = 0; // Compute the local index that tells us the index of this pixel, the strategy for reprojection is a bit different in both cases int localIndex = (intermediateCoord.x & 1) + (intermediateCoord.y & 1) * 2; int currentIndex = _EnableIntegration ? ComputeCheckerBoardIndex(intermediateCoord / 2, _SubPixelIndex) : 0; if (localIndex == currentIndex) { // Accumulate the result with the previous frame #ifdef WITHOUT_LDS cloudLighting = LOAD_TEXTURE2D_X(_CloudsLightingTexture, traceCoord); #else cloudLighting = GetCloudLighting_LDS(groupThreadId, int2(0, 0)); #endif cloudCloudDepth = currentCloudDepth; } else { // Structure that will hold everything NeighborhoodUpsampleData3x3 upsampleData; #ifdef WITHOUT_LDS FillCloudReprojectionNeighborhoodData_NOLDS(traceCoord, localIndex, upsampleData); #else FillCloudReprojectionNeighborhoodData(groupThreadId, localIndex, upsampleData); #endif // Make sure that at least one of the pixels in the neighborhood can be used float rejectNeighborhood; int closestNeighbor; OverrideMaskValues(currentDepth, upsampleData, rejectNeighborhood, closestNeighbor); // 1.0 if we were able to produce a value 0.0 if we failed to if (rejectNeighborhood) { // We don't have any valid history and there is no neighbor that is usable cloudLighting = 0.0f; } else { // We don't have any history for this pixel, but there is at least one neighbor that can be used in the current frame tracing cloudLighting = BilUpColor3x3(currentDepth, upsampleData); } #ifdef WITHOUT_LDS cloudCloudDepth = LOAD_TEXTURE2D_X(_CloudsDepthTexture, traceCoord + IndexToLocalOffsetCoords[closestNeighbor]).x; #else cloudCloudDepth = GetCloudDepth_LDS(groupThreadId, IndexToLocalOffsetCoords[closestNeighbor]); #endif // Due to numerical precision issues, upscaling a bunch of 1.0 can lead to a slightly lower number, this fixes it. if (EvaluateRegionEmptiness(upsampleData) == 1.0) cloudLighting = float4(0, 0, 0, 1); } // Make sure this doesn't go outside of the [0, 1] interval cloudLighting.w = saturate(cloudLighting.w); // Accumulate the result with the previous frame _CloudsLightingTextureRW[COORD_TEXTURE2D_X(intermediateCoord)] = cloudLighting; _CloudsAdditionalTextureRW[COORD_TEXTURE2D_X(intermediateCoord)] = float3(1, currentDepth, cloudCloudDepth); } // Constant buffer where all variables should land CBUFFER_START(VolumetricCloudsUpscaleConstantBuffer) float2 _UpperScreenSize; CBUFFER_END RW_TEXTURE2D_X(float4, _VolumetricCloudsLightingTextureRW); RW_TEXTURE2D_X(float, _VolumetricCloudsDepthTextureRW); void FillLDSUpscale(uint groupIndex, uint2 groupOrigin) { // Define which value we will be acessing with this worker thread int acessCoordX = groupIndex % 6; int acessCoordY = groupIndex / 6; // Everything we are accessing is in intermediate res (half rez). uint2 traceGroupOrigin = groupOrigin / 2; // The initial position of the access int2 originXY = traceGroupOrigin - int2(1, 1); // Compute the sample position int2 sampleCoord = int2(clamp(originXY.x + acessCoordX, 0, _IntermediateScreenSize.x - 1), clamp(originXY.y + acessCoordY, 0, _IntermediateScreenSize.y - 1)); // Read the sample value float4 sampleVal = LOAD_TEXTURE2D_X(_VolumetricCloudsTexture, sampleCoord); float3 depthStatusValue = LOAD_TEXTURE2D_X(_DepthStatusTexture, sampleCoord).xyz; // Store into the LDS gs_cacheR[groupIndex] = sampleVal.r; gs_cacheG[groupIndex] = sampleVal.g; gs_cacheB[groupIndex] = sampleVal.b; gs_cacheA[groupIndex] = sampleVal.a; gs_cacheDP[groupIndex] = depthStatusValue.y; gs_cachePS[groupIndex] = saturate(depthStatusValue.x); gs_cacheDC[groupIndex] = depthStatusValue.z; } [numthreads(8, 8, 1)] void UPSCALE_CLOUDS(uint3 finalCoord : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(finalCoord.z); int2 halfResCoord = finalCoord.xy / 2; #ifndef WITHOUT_LDS // Only 36 workers of the 64 do the pre-fetching if (groupIndex < 36) { // Load 1 value per thread FillLDSUpscale(groupIndex, groupId * 8); } // Make sure all values are loaded in LDS by now. GroupMemoryBarrierWithGroupSync(); #endif // If out of bounds, leave right away if (any(finalCoord.xy >= uint2(_FinalScreenSize.xy))) return; // Grab the depth value of the pixel float highDepth = LOAD_TEXTURE2D_X(_DepthTexture, finalCoord.xy).x; // Compute the index of the pixel in the 2x2 region (L->R, T->B) uint subRegionIndex = (finalCoord.x & 1) + (finalCoord.y & 1) * 2; // Structure that will hold everything NeighborhoodUpsampleData3x3 upsampleData; #ifndef WITHOUT_LDS // Fill the sample data FillCloudUpscaleNeighborhoodData(groupThreadId.xy, subRegionIndex, upsampleData); #else // Fill the sample data FillCloudUpscaleNeighborhoodData_NOLDS(halfResCoord, subRegionIndex, upsampleData); #endif // This flags allows us to track if at least one of the contributing pixels has some clouds int closestNeighbor; bool rejectedNeighborhood; OverrideMaskValues(highDepth, upsampleData, rejectedNeighborhood, closestNeighbor); // Do the bilateral upscale float4 currentClouds = BilUpColor3x3(highDepth, upsampleData); // Read the fallback value and use it if we defined that it was impossible for us to do something about it if (rejectedNeighborhood) currentClouds = float4(0.0, 0.0, 0.0, 1.0); // De-tonemap the inscattering value currentClouds.w = saturate(currentClouds.w); // Due to numerical precision issues, upscaling a bunch of 1.0 can lead to a slightly lower number, this fixes it. if (EvaluateRegionEmptiness(upsampleData) == 1.0) currentClouds = float4(0, 0, 0, 1); // We cannot simply pick the low res depth that ressembles the most the full resolution pixel // Two cases are possible: // - The final pixel doesn't have any clouds. In this case, we don't care about the depth. // - The final pixel has some amount of clouds (which doesn't neccesarly map to any specific pixel in the neighborhood as it has been interpolated). // In the second this case we need to take the average depth (in linear space) of the contributing pixels (the one that have some clouds in them). #ifdef WITHOUT_LDS float cloudDepth = currentClouds.w != 1.0 ? EvaluateUpscaledCloudDepth_NOLDS(halfResCoord, upsampleData) : UNITY_RAW_FAR_CLIP_VALUE; #else //float cloudDepth = GetCloudDepth_LDS(groupThreadId, IndexToLocalOffsetCoords[closestNeighbor]); float cloudDepth = currentClouds.w != 1.0 ? EvaluateUpscaledCloudDepth(groupThreadId, upsampleData) : UNITY_RAW_FAR_CLIP_VALUE; #endif #if defined(PERCEPTUAL_TRANSMITTANCE) // Estimate the transmittance that shall be used float4 currentColor = _CameraColorTexture[COORD_TEXTURE2D_X(finalCoord.xy)]; float finalTransmittance = EvaluateFinalTransmittance(currentColor.rgb, currentClouds.w); #else float finalTransmittance = _CubicTransmittance ? currentClouds.a * currentClouds.a : currentClouds.a; #endif // Store the upscaled result only, composite in later pass. _VolumetricCloudsLightingTextureRW[COORD_TEXTURE2D_X(finalCoord.xy)] = float4(currentClouds.xyz, finalTransmittance); _VolumetricCloudsDepthTextureRW[COORD_TEXTURE2D_X(finalCoord.xy)] = cloudDepth; } [numthreads(8, 8, 1)] void COMBINE_CLOUDS(uint3 finalCoord : SV_DispatchThreadID, int groupIndex : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(finalCoord.z); // If out of bounds, leave right away if (any(finalCoord.xy >= uint2(_FinalScreenSize.xy))) return; // Grab the depth value of the pixel float highDepth = LOAD_TEXTURE2D_X(_DepthTexture, finalCoord.xy).x; float4 sampleVal = LOAD_TEXTURE2D_X(_VolumetricCloudsTexture, finalCoord.xy); // Do the bilateral upscale float4 currentClouds = sampleVal; // De-tonemap the inscattering value currentClouds.w = saturate(currentClouds.w); float cloudsDepth = LOAD_TEXTURE2D_X(_DepthStatusTexture, finalCoord.xy).z; #if defined(PERCEPTUAL_TRANSMITTANCE) // Estimate the transmittance that shall be used float4 currentColor = _CameraColorTexture[COORD_TEXTURE2D_X(finalCoord.xy)]; float finalTransmittance = EvaluateFinalTransmittance(currentColor.rgb, currentClouds.w); #else float finalTransmittance = _CubicTransmittance ? currentClouds.a * currentClouds.a : currentClouds.a; #endif // Store the upscaled result only, composite in later pass. _VolumetricCloudsLightingTextureRW[COORD_TEXTURE2D_X(finalCoord.xy)] = float4(currentClouds.xyz, finalTransmittance); _VolumetricCloudsDepthTextureRW[COORD_TEXTURE2D_X(finalCoord.xy)] = cloudsDepth; }