#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/ExposureCommon.hlsl" #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch #pragma kernel KFixedExposure #pragma kernel KManualCameraExposure #pragma kernel KPrePass #pragma kernel KReduction #pragma kernel KReset TEXTURE2D(_InputTexture); #define PREPASS_TEX_SIZE 1024.0 #define PREPASS_TEX_HALF_SIZE 512.0 //#pragma enable_d3d11_debug_symbols // // Fixed exposure // Doesn't do anything fancy, simply copies the exposure & clamp values set in the volume system // [numthreads(1,1,1)] void KFixedExposure(uint2 dispatchThreadId : SV_DispatchThreadID) { float ev100 = ParamEV100; ev100 -= ParamExposureCompensation; _OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100); } // // Manual camera exposure // Converts aperture / shutter speed / iso / compensation to EV100 // [numthreads(1,1,1)] void KManualCameraExposure(uint2 dispatchThreadId : SV_DispatchThreadID) { float ev100 = ComputeEV100(ParamAperture, ParamShutterSpeed, ParamISO); ev100 -= ParamExposureCompensation; _OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100); } // // Average luminance pre-pass // Transforms the input to log luminance in a square-POT target // [numthreads(8,8,1)] void KPrePass(uint2 dispatchThreadId : SV_DispatchThreadID) { // For XR, interleave single-pass views in a checkerboard pattern UNITY_XR_ASSIGN_VIEW_INDEX((dispatchThreadId.x + dispatchThreadId.y) % _XRViewCount) PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId), rcp(PREPASS_TEX_SIZE), uint2(8u, 8u)); float2 uv = ClampAndScaleUVForBilinear(posInputs.positionNDC); float luma = SampleLuminance(uv); float weight = WeightSample(dispatchThreadId, PREPASS_TEX_SIZE.xx, luma); float logLuma = ComputeEV100FromAvgLuminance(max(luma, 1e-4), MeterCalibrationConstant); _OutputTexture[posInputs.positionSS] = float2(logLuma, weight); } // // Average luminance 2nd & 3rd pass + Evaluation // - 2nd: Reduction 1024 -> 32 // - 3rd: Reduction 32 -> 1 // #define REDUCTION_GROUP_SIZE 16 #define REDUCTION_TOTAL_THREADS 256 groupshared float4 gs_luminances[REDUCTION_TOTAL_THREADS]; groupshared float gs_weights[REDUCTION_TOTAL_THREADS]; // This kernel runs twice, and as the final output, produces the average normalized luminance of the texture produced by // the pre-pass. // // Let's work through the math, but with a simplified example. Instead of a 2D texture, let's assume we have a 1D // texture. And instead of a 1024 -> 32 -> 1 reduction, let's assume we have a 4 -> 2 -> 1 reduction. // // Say the input texture has the following four pixels: (a, A), (b, B), (c, C), (d, D). The first channel of each pixel // is the log luminance, and the second channel is the weight. // // The first pass combines two pixels per thread, and outputs the following two-pixel two-channel intermediate texture: // ((a*A + b*B) / (A + B), (A + B)), ((c*C + d*D) / (C + D), (C + D)) // The second pass calculates exposure as follows: // ((a*A + b*B) / (A + B) * (A + B) + (c*C + d*D) / (C + D) * (C + D)) / (A + B + C + D) // which simplifies to: // (a*A + b*B + c*C + d*D) / (A + B + C + D) // which is the normalized weighted average of the log luminances. We can thus work with weights that don't have to sum // up to 1. // // Notice that (A + B) multiplied in the first pass is cancelled out in the second pass. This is done for two reasons: // It enables parallel reduction, and it keeps the values of the intermediate texture in a reasonable range to fit in // the fp16 data format. We spend a bit more ALU, but we avoid fp16 quantization artifacts. [numthreads(REDUCTION_GROUP_SIZE,REDUCTION_GROUP_SIZE,1)] void KReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID) { uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x; uint2 sampleIdx = (groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy) * 2u; // Store 4 pixels & their weights in the lds float2 p1 = _InputTexture[sampleIdx + uint2(0u, 0u)].xy; float2 p2 = _InputTexture[sampleIdx + uint2(1u, 0u)].xy; float2 p3 = _InputTexture[sampleIdx + uint2(0u, 1u)].xy; float2 p4 = _InputTexture[sampleIdx + uint2(1u, 1u)].xy; float4 smp = float4(p1.x, p2.x, p3.x, p4.x); float4 weights = float4(p1.y, p2.y, p3.y, p4.y); gs_luminances[threadIdx] = smp * weights; gs_weights[threadIdx] = dot(weights, 1.0); GroupMemoryBarrierWithGroupSync(); // Parallel reduction of luminances & weights UNITY_UNROLL for(uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u) { if(threadIdx < s) { gs_luminances[threadIdx] += gs_luminances[threadIdx + s]; gs_weights[threadIdx] += gs_weights[threadIdx + s]; } GroupMemoryBarrierWithGroupSync(); } // Evaluate on group thread 0 if(threadIdx == 0u) { float avgLuminance = dot(gs_luminances[0], 0.25); if (IsNaN(avgLuminance) || IsInf(avgLuminance)) avgLuminance = 1.0; if (gs_weights[0] > 0.0) avgLuminance /= (gs_weights[0] * 0.25); UNITY_BRANCH switch (ParamEvaluateMode) { case 1u: { // Automatic float exposure = AdaptExposure(avgLuminance - ParamExposureCompensation); exposure = clamp(exposure, ParamExposureLimitMin, ParamExposureLimitMax); _OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure); break; } case 2u: { // Curve remapping float minExposure = ParamExposureLimitMin; float maxExposure = ParamExposureLimitMax; float exposure = CurveRemap(avgLuminance, minExposure, maxExposure); exposure = AdaptExposure(exposure - ParamExposureCompensation); exposure = clamp(exposure, minExposure, maxExposure); _OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure); break; } default: { // No evaluate - passthrough to next pass // This is only used when going from 1024 to 32 _OutputTexture[groupId.xy] = float2(avgLuminance, gs_weights[0]); break; } } } } // // Reset the exposure texture to a default state (1,0) // [numthreads(1, 1, 1)] void KReset(uint2 dispatchThreadId : SV_DispatchThreadID) { _OutputTexture[dispatchThreadId] = float2(1.0, 0.0); }