UnityCACAO-HDRP17/com.unity.render-pipelines..../Runtime/Lighting/LightLoop/HDShadowLoop.hlsl


								#ifndef UNITY_HD_SHADOW_LOOP_HLSL

								#define UNITY_HD_SHADOW_LOOP_HLSL


								//#define SHADOW_LOOP_MULTIPLY

								//#define SHADOW_LOOP_AVERAGE


								#if defined(SHADOW_LOOP_MULTIPLY) || defined(SHADOW_LOOP_AVERAGE)

								#define SHADOW_LOOP_WEIGHT

								#endif


								void ShadowLoopMin(HDShadowContext shadowContext, PositionInputs posInput, float3 normalWS, uint featureFlags, uint renderLayer,

								                        out float3 shadow)

								{

								#ifdef SHADOW_LOOP_WEIGHT

								    float shadowCount = 0.0f;

								#endif


								#ifdef SHADOW_LOOP_MULTIPLY

								    shadow = float3(1, 1, 1);

								#elif defined(SHADOW_LOOP_AVERAGE)

								    shadow = float3(0, 0, 0);

								#else

								    shadow = float3(1, 1, 1);

								#endif


								    // With XR single-pass and camera-relative: offset position to do lighting computations from the combined center view (original camera matrix).

								    // This is required because there is only one list of lights generated on the CPU. Shadows are also generated once and shared between the instanced views.

								    ApplyCameraRelativeXR(posInput.positionWS);


								    // Initialize the contactShadow and contactShadowFade fields


								    // First of all we compute the shadow value of the directional light to reduce the VGPR pressure

								    if (featureFlags & LIGHTFEATUREFLAGS_DIRECTIONAL)

								    {

								        // Evaluate sun shadows.

								        if (_DirectionalShadowIndex >= 0)

								        {

								            DirectionalLightData light = _DirectionalLightDatas[_DirectionalShadowIndex];


								            // TODO: this will cause us to load from the normal buffer first. Does this cause a performance problem?

								            float3 wi = -light.forward;


								            // Is it worth sampling the shadow map?

								            if (light.lightDimmer > 0 && light.shadowDimmer > 0)

								            {

								                SHADOW_TYPE shadowD = 1.0;

								#if defined(SCREEN_SPACE_SHADOWS_ON) && !defined(_SURFACE_TYPE_TRANSPARENT)

								                if ((light.screenSpaceShadowIndex & SCREEN_SPACE_SHADOW_INDEX_MASK) != INVALID_SCREEN_SPACE_SHADOW)

								                {

								                    shadowD = GetScreenSpaceColorShadow(posInput, light.screenSpaceShadowIndex).SHADOW_TYPE_SWIZZLE;

								                }

								                else

								#endif

								                {

								                    shadowD = GetDirectionalShadowAttenuation(shadowContext, posInput.positionSS, posInput.positionWS, normalWS, light.shadowIndex, wi);

								                }


								#ifdef SHADOW_LOOP_MULTIPLY

								                shadow *= lerp(light.shadowTint, float3(1, 1, 1), shadowD);

								#elif defined(SHADOW_LOOP_AVERAGE)

								                shadow += lerp(light.shadowTint, float3(1, 1, 1), shadowD);

								#else

								                shadow = min(shadow, shadowD.SHADOW_TYPE_SWIZZLE);

								#endif

								#ifdef SHADOW_LOOP_WEIGHT

								                shadowCount += 1.0f;

								#endif

								            }

								        }

								    }


								    if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)

								    {

								        uint lightCount, lightStart;


								#ifndef LIGHTLOOP_DISABLE_TILE_AND_CLUSTER

								        GetCountAndStart(posInput, LIGHTCATEGORY_PUNCTUAL, lightStart, lightCount);

								#else   // LIGHTLOOP_DISABLE_TILE_AND_CLUSTER

								        lightCount = _PunctualLightCount;

								        lightStart = 0;

								#endif


								        bool fastPath = false;

								        uint lightStartLane0;

								        fastPath = IsFastPath(lightStart, lightStartLane0);


								        if (fastPath)

								        {

								            lightStart = lightStartLane0;

								        }


								        // Scalarized loop. All lights that are in a tile/cluster touched by any pixel in the wave are loaded (scalar load), only the one relevant to current thread/pixel are processed.

								        // For clarity, the following code will follow the convention: variables starting with s_ are meant to be wave uniform (meant for scalar register),

								        // v_ are variables that might have different value for each thread in the wave (meant for vector registers).

								        // This will perform more loads than it is supposed to, however, the benefits should offset the downside, especially given that light data accessed should be largely coherent.

								        // Note that the above is valid only if wave intriniscs are supported.

								        uint v_lightListOffset = 0;

								        uint v_lightIdx = lightStart;


								#if NEED_TO_CHECK_HELPER_LANE

								        // On some platform helper lanes don't behave as we'd expect, therefore we prevent them from entering the loop altogether.

								        // IMPORTANT! This has implications if ddx/ddy is used on results derived from lighting, however given Lightloop is called in compute we should be

								        // sure it will not happen.

								        bool isHelperLane = WaveIsHelperLane();

								        while (!isHelperLane && v_lightListOffset < lightCount)

								#else

								        while (v_lightListOffset < lightCount)

								#endif

								        {

								            v_lightIdx = FetchIndex(lightStart, v_lightListOffset);

								            uint s_lightIdx = ScalarizeElementIndex(v_lightIdx, fastPath);

								            if (s_lightIdx == -1)

								                break;


								            LightData s_lightData = FetchLight(s_lightIdx);


								            // If current scalar and vector light index match, we process the light. The v_lightListOffset for current thread is increased.

								            // Note that the following should really be ==, however, since helper lanes are not considered by WaveActiveMin, such helper lanes could

								            // end up with a unique v_lightIdx value that is smaller than s_lightIdx hence being stuck in a loop. All the active lanes will not have this problem.

								            if (s_lightIdx >= v_lightIdx)

								            {

								                v_lightListOffset++;

								                if (IsMatchingLightLayer(s_lightData.lightLayers, renderLayer) &&

								                    s_lightData.shadowIndex >= 0 &&

								                    s_lightData.shadowDimmer > 0)

								                {

								                    float shadowP;

								                    float3 L;

								                    float4 distances; // {d, d^2, 1/d, d_proj}

								                    GetPunctualLightVectors(posInput.positionWS, s_lightData, L, distances);


								                    // Projector lights (box, pyramid) always have cookies, so we can perform clipping inside the if().

								                    float lightinBounds = 1.0;

								                    if (s_lightData.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID || s_lightData.lightType == GPULIGHTTYPE_PROJECTOR_BOX)

								                    {

								                        float3 lightToSample = posInput.positionWS - s_lightData.positionRWS;

								                        float3x3 lightToWorld = float3x3(s_lightData.right, s_lightData.up, s_lightData.forward);

								                        float3 positionLS   = mul(lightToSample, transpose(lightToWorld));


								                        // Perform orthographic or perspective projection.

								                        float  perspectiveZ = (s_lightData.lightType != GPULIGHTTYPE_PROJECTOR_BOX) ? positionLS.z : 1.0;

								                        float2 positionCS   = positionLS.xy / perspectiveZ;


								                        float z = positionLS.z;

								                        float r = s_lightData.range;


								                        // Box lights have no range attenuation, so we must clip manually.

								                        lightinBounds = Max3(abs(positionCS.x), abs(positionCS.y), abs(z - 0.5 * r) - 0.5 * r + 1) <= s_lightData.boxLightSafeExtent ?  1 : 0;

								                    }


								                    if (distances.x < s_lightData.range

								                        && PunctualLightAttenuation(distances, s_lightData.rangeAttenuationScale, s_lightData.rangeAttenuationBias, s_lightData.angleScale, s_lightData.angleOffset) > 0.0

								                        && lightinBounds > 0.0

								                        && L.y > 0.0)

								                    {

								#if defined(SCREEN_SPACE_SHADOWS_ON) && !defined(_SURFACE_TYPE_TRANSPARENT)

								                        if ((s_lightData.screenSpaceShadowIndex & SCREEN_SPACE_SHADOW_INDEX_MASK) != INVALID_SCREEN_SPACE_SHADOW)

								                        {

								                            shadowP = GetScreenSpaceShadow(posInput, s_lightData.screenSpaceShadowIndex);

								                        }

								                        else

								#endif

								                        {

								                            shadowP = GetPunctualShadowAttenuation(shadowContext, posInput.positionSS, posInput.positionWS, normalWS, s_lightData.shadowIndex, L, distances.x, s_lightData.lightType == GPULIGHTTYPE_POINT, s_lightData.lightType != GPULIGHTTYPE_PROJECTOR_BOX);

								                            shadowP = s_lightData.nonLightMappedOnly ? min(1.0f, shadowP) : shadowP;

								                        }

								                        shadowP = lerp(1.0f, shadowP, s_lightData.shadowDimmer);


								#ifdef SHADOW_LOOP_MULTIPLY

								                        shadow *= lerp(s_lightData.shadowTint, float3(1, 1, 1), shadowP);

								#elif defined(SHADOW_LOOP_AVERAGE)

								                        shadow += lerp(s_lightData.shadowTint, float3(1, 1, 1), shadowP);

								#else

								                        shadow = min(shadow, shadowP.xxx);

								#endif

								#ifdef SHADOW_LOOP_WEIGHT

								                        shadowCount += 1.0f;

								#endif

								                    }

								                }

								            }

								        }

								    }


								    if (featureFlags & LIGHTFEATUREFLAGS_AREA)

								    {

								        uint lightCount, lightStart;


								    #ifndef LIGHTLOOP_DISABLE_TILE_AND_CLUSTER

								        GetCountAndStart(posInput, LIGHTCATEGORY_AREA, lightStart, lightCount);

								    #else

								        lightCount = _AreaLightCount;

								        lightStart = _PunctualLightCount;

								    #endif


								        // COMPILER BEHAVIOR WARNING!

								        // If rectangle lights are before line lights, the compiler will duplicate light matrices in VGPR because they are used differently between the two types of lights.

								        // By keeping line lights first we avoid this behavior and save substantial register pressure.

								        // TODO: This is based on the current Lit.shader and can be different for any other way of implementing area lights, how to be generic and ensure performance ?


								        uint i;

								        if (lightCount > 0)

								        {

								            i = 0;


								            uint      last      = lightCount - 1;

								            LightData lightData = FetchLight(lightStart, i);


								            while (i <= last && lightData.lightType == GPULIGHTTYPE_TUBE)

								            {

								                lightData = FetchLight(lightStart, min(++i, last));

								            }


								            while (i <= last) // GPULIGHTTYPE_RECTANGLE

								            {

								                lightData.lightType = GPULIGHTTYPE_RECTANGLE; // Enforce constant propagation


								                float shadowArea = 1.0f;


								                // If the point to shade is in the positive hemisphere of the area light, we can read the shadow.

								                if (dot(lightData.forward, posInput.positionWS) > dot(lightData.forward, lightData.positionRWS))

								                {

								                    if (IsMatchingLightLayer(lightData.lightLayers, renderLayer))

								                    {

								#if defined(SCREEN_SPACE_SHADOWS_ON) && !defined(_SURFACE_TYPE_TRANSPARENT)

								                        if ((lightData.screenSpaceShadowIndex & SCREEN_SPACE_SHADOW_INDEX_MASK) != INVALID_SCREEN_SPACE_SHADOW)

								                        {

								                            shadowArea = GetScreenSpaceShadow(posInput, lightData.screenSpaceShadowIndex);

								                        }

								                        else

								#endif

								                        {

								                            float3 L;

								                            float4 distances; // {d, d^2, 1/d, d_proj}

								                            GetPunctualLightVectors(posInput.positionWS, lightData, L, distances);

								                            float lightRadSqr = lightData.size.x;

								                            float shadowP;


								                            float coef = 0.0f;

								                            float3 unL = lightData.positionRWS - posInput.positionWS;

								                            if (dot(lightData.forward, unL) < FLT_EPS)

								                            {

								                                float3x3 lightToWorld = float3x3(lightData.right, lightData.up, -lightData.forward);

								                                unL = mul(unL, transpose(lightToWorld));


								                                float halfWidth   = lightData.size.x*0.5;

								                                float halfHeight  = lightData.size.y*0.5;


								                                float  range      = lightData.range;

								                                float3 invHalfDim = rcp(float3(range + halfWidth,

								                                                               range + halfHeight,

								                                                               range));


								                                coef = EllipsoidalDistanceAttenuation(unL, invHalfDim,

								                                                                           lightData.rangeAttenuationScale,

								                                                                           lightData.rangeAttenuationBias);

								                            }


								                            if (distances.x < lightData.range && coef > 0.0)

								                            {

								                                shadowArea = GetRectAreaShadowAttenuation(shadowContext, posInput.positionSS, posInput.positionWS, normalWS, lightData.shadowIndex, normalize(lightData.positionRWS), length(lightData.positionRWS));

								                            }

								                        }

								                    }


								#ifdef SHADOW_LOOP_MULTIPLY

								                    shadow *= lerp(lightData.shadowTint, float3(1, 1, 1), shadowArea);

								#elif defined(SHADOW_LOOP_AVERAGE)

								                    shadow += lerp(lightData.shadowTint, float3(1, 1, 1), shadowArea);

								#else

								                    shadow = min(shadow, shadowArea.xxx);

								#endif

								#ifdef SHADOW_LOOP_WEIGHT

								                    shadowCount += 1.0f;

								#endif

								                }


								                lightData = FetchLight(lightStart, min(++i, last));

								            }

								        }

								    }

								#ifdef SHADOW_LOOP_MULTIPLY

								    if (shadowCount == 0.0f)

								    {

								        shadow = float3(1, 1, 1);

								    }

								#elif defined(SHADOW_LOOP_AVERAGE)

								    if (shadowCount > 0.0f)

								    {

								        shadow /= shadowCount;

								    }

								    else

								    {

								        shadow = float3(1, 1, 1);

								    }

								#endif

								}


								#endif