Laurens-Packages/Packages/com.unity.render-pipelines..../Runtime/Lighting/LightLoop/lightlistbuild-bigtile.compute


								#pragma kernel BigTileLightListGen


								#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"

								#include "Packages/com.unity.render-pipelines.high-definition-config/Runtime/ShaderConfig.cs.hlsl"


								#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariablesGlobal.hlsl"

								#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl"

								#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightingConvexHullUtils.hlsl"

								#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/SortingComputeUtils.hlsl"

								#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightCullUtils.hlsl"

								#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch


								#pragma multi_compile _ GENERATE_VOLUMETRIC_BIGTILE


								#define EXACT_EDGE_TESTS

								#define PERFORM_SPHERICAL_INTERSECTION_TESTS


								// is not actually used for anything in this kernel

								#define USE_OBLIQUE_MODE


								#define MAX_NR_BIGTILE_LIGHTS               (MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE-1)


								StructuredBuffer<float4> g_vBoundsBuffer : register( t1 );

								StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);

								StructuredBuffer<SFiniteLightBound> g_data : register( t3 );


								#ifdef PLATFORM_LANE_COUNT      // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases.

								#define NR_THREADS              PLATFORM_LANE_COUNT

								#else

								#define NR_THREADS              64                                  // default to 64 threads per group on other platforms..

								#endif


								// output buffer

								RWStructuredBuffer<uint> g_vLightList : register( u0 );     // don't support RWBuffer yet in unity


								#if defined(GENERATE_VOLUMETRIC_BIGTILE)

								// Output buffer for volumetric big tiles

								RWStructuredBuffer<uint> g_vVolumetricLightList;


								groupshared unsigned int volumetricLightCounts[NR_THREADS];


								#endif


								// 2kB (room for roughly 30 wavefronts)

								groupshared unsigned int lightsListLDS[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE];

								groupshared uint lightOffs;


								// TODO: Remove this function and g_mInvScrProjectionArr from constants.

								// Only usage of that constant.

								float GetLinearDepth(float2 pixXY, float zDptBufSpace, uint eyeIndex)    // 0 is near 1 is far

								{

								    float4x4 g_mInvScrProjection = g_mInvScrProjectionArr[eyeIndex];


								#ifdef USE_OBLIQUE_MODE

								    float2 res2 = mul(g_mInvScrProjection, float4(pixXY, zDptBufSpace, 1.0)).zw;

								    return res2.x / res2.y;

								#else

								    // for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)

								    // however this function must also work for orthographic projection so we keep it like this.

								    float m22 = g_mInvScrProjection[2].z, m23 = g_mInvScrProjection[2].w;

								    float m32 = g_mInvScrProjection[3].z, m33 = g_mInvScrProjection[3].w;


								    return (m22*zDptBufSpace+m23) / (m32*zDptBufSpace+m33);

								#endif

								}


								float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth, uint eyeIndex)

								{

								    float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex];


								    bool isOrthographic = g_isOrthographic!=0;

								    float fSx = g_mScrProjection[0].x;

								    float fSy = g_mScrProjection[1].y;

								    float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;

								    float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;


								#if USE_LEFT_HAND_CAMERA_SPACE

								    bool useLeftHandVersion = true;

								#else

								    bool useLeftHandVersion = isOrthographic;

								#endif


								    float s = useLeftHandVersion ? 1 : (-1);

								    float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);


								    return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);

								}


								float GetOnePixDiagWorldDistAtDepthOne(uint eyeIndex)

								{

								    float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex];


								    float fSx = g_mScrProjection[0].x;

								    float fSy = g_mScrProjection[1].y;


								    return length( float2(1.0/fSx,1.0/fSy) );

								}


								#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS

								void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate, uint eyeIndex);

								#endif


								#ifdef EXACT_EDGE_TESTS

								void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, uint eyeIndex);

								#endif


								bool LightAffectVolumetric(uint lightIndex)

								{

								    uint flags = _LightVolumeData[lightIndex].featureFlags;

								    bool supportedLightShape = flags == LIGHTFEATUREFLAGS_PUNCTUAL || flags == LIGHTFEATUREFLAGS_DIRECTIONAL;

								    bool affectVolumetric = _LightVolumeData[lightIndex].affectVolumetric != 0;

								    return affectVolumetric && supportedLightShape;

								}


								[numthreads(NR_THREADS, 1, 1)]

								void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)

								{

								    uint eyeIndex = u3GroupID.z;


								    uint2 tileIDX = u3GroupID.xy;

								    uint t=threadID;


								    uint iWidth = g_viDimensions.x;

								    uint iHeight = g_viDimensions.y;

								    uint nrBigTilesX = (iWidth+63)/64;

								    uint nrBigTilesY = (iHeight+63)/64;


								    if(t==0) lightOffs = 0;


								#if NR_THREADS > PLATFORM_LANE_COUNT

								    GroupMemoryBarrierWithGroupSync();

								#endif


								    // Raw pixel coordinates of tile

								    uint2 viTilLL = 64*tileIDX;

								    uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) );            // not width and height minus 1 since viTilUR represents the end of the tile corner.


								    // 'Normalized' coordinates of tile, for use with AABB bounds in g_vBoundsBuffer

								    float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);

								    float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);


								    // build coarse list using AABB

								    for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)

								    {

								        const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(l, g_iNrVisibLights, eyeIndex);

								        const float2 vMi = g_vBoundsBuffer[boundsIndices.min].xy;

								        const float2 vMa = g_vBoundsBuffer[boundsIndices.max].xy;


								        if( all(vMa>vTileLL) && all(vMi<vTileUR))

								        {

								            unsigned int uInc = 1;

								            unsigned int uIndex;

								            InterlockedAdd(lightOffs, uInc, uIndex);


								            if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l;     // add to light list

								        }

								    }


								#if NR_THREADS > PLATFORM_LANE_COUNT || defined(SHADER_API_XBOXONE) || defined(SHADER_API_GAMECORE) || defined(SHADER_API_SWITCH) // not sure why XB1 and Switch need the barrier (it will not be correct without)

								    GroupMemoryBarrierWithGroupSync();

								#endif


								    uint iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);


								#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS

								    SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))), eyeIndex );

								#endif


								#ifdef EXACT_EDGE_TESTS

								    CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, eyeIndex);

								#endif


								    // sort lights

								    SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE, t, NR_THREADS);


								    if(t==0) lightOffs = 0;

								    GroupMemoryBarrierWithGroupSync();

								    uint i;

								    for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<(uint)g_iNrVisibLights) InterlockedAdd(lightOffs, 1);

								    GroupMemoryBarrierWithGroupSync();

								    iNrCoarseLights = lightOffs;


								    uint offs = tileIDX.y*nrBigTilesX + tileIDX.x + (eyeIndex * nrBigTilesX * nrBigTilesY);


								    for(i = t; i * 2 < iNrCoarseLights + 1; i += NR_THREADS / 2)

								    {

								        uint id = i * 2;


								        uint lightIndexOrCount0 = iNrCoarseLights; // Count

								        if (id > 0) // cannot use ternary operator here (it would evaluate both sides and fetch invalid indices, causing crash on some GPUs)

								        {

								            lightIndexOrCount0 = lightsListLDS[id - 1]; // Index0

								        }


								        uint lightIndex1 = 0;  // Index1

								        if (id < iNrCoarseLights) // cannot use ternary operator here (it would evaluate both sides and fetch invalid indices, causing crash on some GPUs)

								        {

								            lightIndex1 = lightsListLDS[id];

								        }


								        // Pack 2 light indices into a single bigtile value

								        g_vLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE * offs / 2 + i] = (lightIndexOrCount0 & 0xFFFF) | (lightIndex1 << 16);

								    }


								#if defined(GENERATE_VOLUMETRIC_BIGTILE)


								    uint bucketSize = MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE / NR_THREADS;

								    uint bucketCount = MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE / bucketSize;

								    if (t < bucketCount)

								    {

								        // Pack light indices affecting volumetric fog in bucket of MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE / NR_THREADS (8 by default)

								        uint localVolumetricLightCount = 0;

								        for (i = 0; i < bucketSize; i++)

								        {

								            uint id = t * bucketSize + i;

								            if (id >= iNrCoarseLights)

								                break;

								            uint lightIndex = lightsListLDS[id];

								            if (LightAffectVolumetric(lightIndex))

								            {

								                lightsListLDS[t * bucketSize + localVolumetricLightCount] = lightIndex;

								                localVolumetricLightCount++;

								            }

								        }


								        // Keep the volumetric light count in the bucket (volumetricLightCounts will be overwritten with the write offset)

								        volumetricLightCounts[t] = localVolumetricLightCount;


								        GroupMemoryBarrierWithGroupSync();


								        if (t == 0)

								        {

								            // for each bucket, write the packed light indices back to the volumetric bigtile buffer

								            uint packedLightData = 0;

								            uint volumetricLightCounter = 0;

								            uint firstLightIndex = -1;

								            uint bigTileOffset = MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE * offs / 2;

								            for (i = 0; i < bucketCount; i++)

								            {

								                if (i * bucketSize >= iNrCoarseLights)

								                    break;


								                for (uint j = 0; j < volumetricLightCounts[i]; j++)

								                {

								                    uint lightIndex = lightsListLDS[i * bucketSize + j];

								                    volumetricLightCounter++;

								                    if (firstLightIndex == -1)

								                        firstLightIndex = lightIndex;


								                    packedLightData |= lightIndex << ((volumetricLightCounter & 1) * 16);


								                    if (volumetricLightCounter & 1)

								                    {

								                        g_vVolumetricLightList[bigTileOffset + volumetricLightCounter / 2] = packedLightData;

								                        packedLightData = 0;

								                    }

								                }

								            }


								            // In case a single light index remains in the packed data, we flush it to the bigtile buffer

								            if (volumetricLightCounter != 0 && !(volumetricLightCounter & 1))

								                g_vVolumetricLightList[bigTileOffset + volumetricLightCounter / 2] = packedLightData;


								            if (firstLightIndex == -1)

								                firstLightIndex = 0;

								            g_vVolumetricLightList[bigTileOffset] = volumetricLightCounter | (firstLightIndex << 16);

								        }

								    }


								#endif

								}


								#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS

								void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate, uint eyeIndex)

								{

								#if USE_LEFT_HAND_CAMERA_SPACE

								    float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0, eyeIndex);

								#else

								    float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0, eyeIndex);

								#endif


								    float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne(eyeIndex);

								    float halfTileSizeAtZDistOne = 32*onePixDiagDist;       // scale by half a tile


								    for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)

								    {

								        const int boundIndex = GenerateLightCullDataIndex(lightsListLDS[l], g_iNrVisibLights, eyeIndex);

								        SFiniteLightBound lgtDat = g_data[boundIndex];


								        if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )

								            lightsListLDS[l]=UINT_MAX;

								    }


								#if NR_THREADS > PLATFORM_LANE_COUNT

								    GroupMemoryBarrierWithGroupSync();

								#endif

								}

								#endif


								#ifdef EXACT_EDGE_TESTS

								float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane, uint eyeIndex)

								{

								    float x = (i&1)==0 ? viTilLL.x : viTilUR.x;

								    float y = (i&2)==0 ? viTilLL.y : viTilUR.y;

								    float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;

								#if !USE_LEFT_HAND_CAMERA_SPACE

								    z = -z;

								#endif

								    return GetViewPosFromLinDepth( float2(x, y), z, eyeIndex);

								}


								void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex)

								{

								    int iSection = e0>>2;       // section 0 is side edges, section 1 is near edges and section 2 is far edges

								    int iSwizzle = e0&0x3;


								    int i=iSwizzle + (2*(iSection&0x2));    // offset by 4 at section 2

								    vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane, eyeIndex);


								#if USE_LEFT_HAND_CAMERA_SPACE

								    float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,1.0);

								#else

								    float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,-1.0);

								#endif


								    vE0 = iSection == 0 ? edgeSectionZero : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));

								}


								void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, uint eyeIndex)

								{

								    const bool bOnlyNeedFrustumSideEdges = true;

								    const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.


								    const int totNrEdgePairs = 12*nrFrustEdges;

								    for(int l=0; l<iNrCoarseLights; l++)

								    {

								        const uint idxCoarse = lightsListLDS[l];

								        const int bufIdxCoarse = GenerateLightCullDataIndex(idxCoarse, g_iNrVisibLights, eyeIndex);


								        bool canEnter = idxCoarse<(uint) g_iNrVisibLights;


								        if(canEnter) canEnter = _LightVolumeData[bufIdxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE;       // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.

								        UNITY_BRANCH if(canEnter)

								        {

								            SFiniteLightBound lgtDat = g_data[bufIdxCoarse];


								            const float3 boxX = lgtDat.boxAxisX.xyz;

								            const float3 boxY = lgtDat.boxAxisY.xyz;

								            const float3 boxZ = -lgtDat.boxAxisZ.xyz;   // flip axis (so it points away from the light direction for a spot-light)

								            const float3 center = lgtDat.center.xyz;

								            const float2 scaleXY = lgtDat.scaleXY;


								            for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)

								            {

								                int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right

								                int e1 = i - e0*nrFrustEdges;


								                int idx_cur=0, idx_twin=0;

								                float3 vP0, vE0;

								                GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);


								                float3 vP1, vE1;

								                GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane, eyeIndex);


								                // potential separation plane

								                float3 vN = cross(vE0, vE1);


								                int positive=0, negative=0;

								                for(int k=1; k<8; k++)      // only need to test 7 verts (technically just 6).

								                {

								                    int j = (idx_cur+k)&0x7;

								                    float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);

								                    float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);

								                    if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;

								                }

								                int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));


								                positive=0; negative=0;

								                for(int j=0; j<8; j++)

								                {

								                    float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane, eyeIndex);

								                    float fSignDist = dot(vN, vPf-vP0);

								                    if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;

								                }

								                int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));


								                bool bFoundSepPlane = (resh*resf)<0;

								                if(bFoundSepPlane) lightsListLDS[l]=UINT_MAX;

								            }

								        }

								    }

								#if NR_THREADS > PLATFORM_LANE_COUNT

								    GroupMemoryBarrierWithGroupSync();

								#endif

								}

								#endif