You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

579 lines
30 KiB

using Unity.Burst;
using Unity.Collections;
using UnityEngine.Assertions;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs;
using System;
namespace UnityEngine.Rendering
{
internal struct GPUInstanceDataBufferBuilder : IDisposable
{
private NativeList<GPUInstanceComponentDesc> m_Components;
private MetadataValue CreateMetadataValue(int nameID, int gpuAddress, bool isOverridden)
{
const uint kIsOverriddenBit = 0x80000000;
return new MetadataValue
{
NameID = nameID,
Value = (uint)gpuAddress | (isOverridden ? kIsOverriddenBit : 0),
};
}
public void AddComponent<T>(int propertyID, bool isOverriden, bool isPerInstance, InstanceType instanceType, InstanceComponentGroup componentGroup = InstanceComponentGroup.Default) where T : unmanaged
{
AddComponent(propertyID, isOverriden, UnsafeUtility.SizeOf<T>(), isPerInstance, instanceType, componentGroup);
}
public void AddComponent(int propertyID, bool isOverriden, int byteSize, bool isPerInstance, InstanceType instanceType, InstanceComponentGroup componentGroup)
{
if (!m_Components.IsCreated)
m_Components = new NativeList<GPUInstanceComponentDesc>(64, Allocator.Temp);
if (m_Components.Length > 0)
Assert.IsTrue(m_Components[m_Components.Length - 1].instanceType <= instanceType, "Added components must be sorted by InstanceType for better memory layout.");
m_Components.Add(new GPUInstanceComponentDesc(propertyID, byteSize, isOverriden, isPerInstance, instanceType, componentGroup));
}
public unsafe GPUInstanceDataBuffer Build(in InstanceNumInfo instanceNumInfo)
{
int perInstanceComponentCounts = 0;
var perInstanceComponentIndices = new NativeArray<int>(m_Components.Length, Allocator.Temp);
var componentAddresses = new NativeArray<int>(m_Components.Length, Allocator.Temp);
var componentByteSizes = new NativeArray<int>(m_Components.Length, Allocator.Temp);
var componentInstanceIndexRanges = new NativeArray<Vector2Int>(m_Components.Length, Allocator.Temp);
GPUInstanceDataBuffer newBuffer = new GPUInstanceDataBuffer();
newBuffer.instanceNumInfo = instanceNumInfo;
newBuffer.instancesNumPrefixSum = new NativeArray<int>((int)InstanceType.Count, Allocator.Persistent);
newBuffer.instancesSpan = new NativeArray<int>((int)InstanceType.Count, Allocator.Persistent);
int sum = 0;
for (int i = 0; i < (int)InstanceType.Count; ++i)
{
newBuffer.instancesNumPrefixSum[i] = sum;
sum += instanceNumInfo.InstanceNums[i];
newBuffer.instancesSpan[i] = instanceNumInfo.GetInstanceNumIncludingChildren((InstanceType)i);
}
newBuffer.layoutVersion = GPUInstanceDataBuffer.NextVersion();
newBuffer.version = 0;
newBuffer.defaultMetadata = new NativeArray<MetadataValue>(m_Components.Length, Allocator.Persistent);
newBuffer.descriptions = new NativeArray<GPUInstanceComponentDesc>(m_Components.Length, Allocator.Persistent);
newBuffer.nameToMetadataMap = new NativeParallelHashMap<int, int>(m_Components.Length, Allocator.Persistent);
newBuffer.gpuBufferComponentAddress = new NativeArray<int>(m_Components.Length, Allocator.Persistent);
//Initial offset, must be 0, 0, 0, 0.
int vec4Size = UnsafeUtility.SizeOf<Vector4>();
int byteOffset = 4 * vec4Size;
for (int c = 0; c < m_Components.Length; ++c)
{
var componentDesc = m_Components[c];
newBuffer.descriptions[c] = componentDesc;
int instancesBegin = newBuffer.instancesNumPrefixSum[(int)componentDesc.instanceType];
int instancesEnd = instancesBegin + newBuffer.instancesSpan[(int)componentDesc.instanceType];
int instancesNum = componentDesc.isPerInstance ? instancesEnd - instancesBegin : 1;
Assert.IsTrue(instancesNum >= 0);
componentInstanceIndexRanges[c] = new Vector2Int(instancesBegin, instancesBegin + instancesNum);
int componentGPUAddress = byteOffset - instancesBegin * componentDesc.byteSize;
Assert.IsTrue(componentGPUAddress >= 0, "GPUInstanceDataBufferBuilder: GPU address is negative. This is not supported for now. See kIsOverriddenBit." +
"In general, if there is only one root InstanceType (MeshRenderer in our case) with a component that is larger or equal in size than any component in a derived InstanceType." +
"And the number of parent gpu instances are always larger or equal to the number of derived type gpu instances. Than GPU address cannot become negative.");
newBuffer.gpuBufferComponentAddress[c] = componentGPUAddress;
newBuffer.defaultMetadata[c] = CreateMetadataValue(componentDesc.propertyID, componentGPUAddress, componentDesc.isOverriden);
componentAddresses[c] = componentGPUAddress;
componentByteSizes[c] = componentDesc.byteSize;
int componentByteSize = componentDesc.byteSize * instancesNum;
byteOffset += componentByteSize;
bool addedToMap = newBuffer.nameToMetadataMap.TryAdd(componentDesc.propertyID, c);
Assert.IsTrue(addedToMap, "Repetitive metadata element added to object.");
if (componentDesc.isPerInstance)
{
perInstanceComponentIndices[perInstanceComponentCounts] = c;
perInstanceComponentCounts++;
}
}
newBuffer.byteSize = byteOffset;
newBuffer.gpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, newBuffer.byteSize / 4, 4);
newBuffer.gpuBuffer.SetData(new NativeArray<Vector4>(4, Allocator.Temp), 0, 0, 4);
newBuffer.validComponentsIndicesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, perInstanceComponentCounts, 4);
newBuffer.validComponentsIndicesGpuBuffer.SetData(perInstanceComponentIndices, 0, 0, perInstanceComponentCounts);
newBuffer.componentAddressesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 4);
newBuffer.componentAddressesGpuBuffer.SetData(componentAddresses, 0, 0, m_Components.Length);
newBuffer.componentInstanceIndexRangesGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 8);
newBuffer.componentInstanceIndexRangesGpuBuffer.SetData(componentInstanceIndexRanges, 0, 0, m_Components.Length);
newBuffer.componentByteCountsGpuBuffer = new GraphicsBuffer(GraphicsBuffer.Target.Raw, m_Components.Length, 4);
newBuffer.componentByteCountsGpuBuffer.SetData(componentByteSizes, 0, 0, m_Components.Length);
newBuffer.perInstanceComponentCount = perInstanceComponentCounts;
perInstanceComponentIndices.Dispose();
componentAddresses.Dispose();
componentByteSizes.Dispose();
return newBuffer;
}
public void Dispose()
{
if (m_Components.IsCreated)
m_Components.Dispose();
}
}
internal struct GPUInstanceDataBufferUploader : IDisposable
{
private static class UploadKernelIDs
{
public static readonly int _InputValidComponentCounts = Shader.PropertyToID("_InputValidComponentCounts");
public static readonly int _InputInstanceCounts = Shader.PropertyToID("_InputInstanceCounts");
public static readonly int _InputInstanceByteSize = Shader.PropertyToID("_InputInstanceByteSize");
public static readonly int _InputComponentOffsets = Shader.PropertyToID("_InputComponentOffsets");
public static readonly int _InputInstanceData = Shader.PropertyToID("_InputInstanceData");
public static readonly int _InputInstanceIndices = Shader.PropertyToID("_InputInstanceIndices");
public static readonly int _InputValidComponentIndices = Shader.PropertyToID("_InputValidComponentIndices");
public static readonly int _InputComponentAddresses = Shader.PropertyToID("_InputComponentAddresses");
public static readonly int _InputComponentByteCounts = Shader.PropertyToID("_InputComponentByteCounts");
public static readonly int _InputComponentInstanceIndexRanges = Shader.PropertyToID("_InputComponentInstanceIndexRanges");
public static readonly int _OutputBuffer = Shader.PropertyToID("_OutputBuffer");
}
public struct GPUResources : IDisposable
{
public ComputeBuffer instanceData;
public ComputeBuffer instanceIndices;
public ComputeBuffer inputComponentOffsets;
public ComputeBuffer validComponentIndices;
public ComputeShader cs;
public int kernelId;
private int m_InstanceDataByteSize;
private int m_InstanceCount;
private int m_ComponentCounts;
private int m_ValidComponentIndicesCount;
public void LoadShaders(GPUResidentDrawerResources resources)
{
if (cs == null)
{
cs = resources.instanceDataBufferUploadKernels;
kernelId = cs.FindKernel("MainUploadScatterInstances");
}
}
public void CreateResources(int newInstanceCount, int sizePerInstance, int newComponentCounts, int validComponentIndicesCount)
{
int newInstanceDataByteSize = newInstanceCount * sizePerInstance;
if (newInstanceDataByteSize > m_InstanceDataByteSize || instanceData == null)
{
if (instanceData != null)
instanceData.Release();
instanceData = new ComputeBuffer((newInstanceDataByteSize + 3) / 4, 4, ComputeBufferType.Raw);
m_InstanceDataByteSize = newInstanceDataByteSize;
}
if (newInstanceCount > m_InstanceCount || instanceIndices == null)
{
if (instanceIndices != null)
instanceIndices.Release();
instanceIndices = new ComputeBuffer(newInstanceCount, 4, ComputeBufferType.Raw);
m_InstanceCount = newInstanceCount;
}
if (newComponentCounts > m_ComponentCounts || inputComponentOffsets == null)
{
if (inputComponentOffsets != null)
inputComponentOffsets.Release();
inputComponentOffsets = new ComputeBuffer(newComponentCounts, 4, ComputeBufferType.Raw);
m_ComponentCounts = newComponentCounts;
}
if (validComponentIndicesCount > m_ValidComponentIndicesCount || validComponentIndices == null)
{
if (validComponentIndices != null)
validComponentIndices.Release();
validComponentIndices = new ComputeBuffer(validComponentIndicesCount, 4, ComputeBufferType.Raw);
m_ValidComponentIndicesCount = validComponentIndicesCount;
}
}
public void Dispose()
{
cs = null;
if (instanceData != null)
instanceData.Release();
if (instanceIndices != null)
instanceIndices.Release();
if (inputComponentOffsets != null)
inputComponentOffsets.Release();
if(validComponentIndices != null)
validComponentIndices.Release();
}
}
int m_UintPerInstance;
int m_Capacity;
int m_InstanceCount;
NativeArray<bool> m_ComponentIsInstanced;
NativeArray<int> m_ComponentDataIndex;
NativeArray<int> m_DescriptionsUintSize;
NativeArray<uint> m_TmpDataBuffer;
NativeList<int> m_WritenComponentIndices;
private NativeArray<int> m_DummyArray;
public GPUInstanceDataBufferUploader(in NativeArray<GPUInstanceComponentDesc> descriptions, int capacity, InstanceType instanceType)
{
m_Capacity = capacity;
m_InstanceCount = 0;
m_UintPerInstance = 0;
m_ComponentDataIndex = new NativeArray<int>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
m_ComponentIsInstanced = new NativeArray<bool>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
m_DescriptionsUintSize = new NativeArray<int>(descriptions.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
m_WritenComponentIndices = new NativeList<int>(descriptions.Length, Allocator.TempJob);
m_DummyArray = new NativeArray<int>(0, Allocator.Persistent);
int uintSize = UnsafeUtility.SizeOf<uint>();
for (int c = 0; c < descriptions.Length; ++c)
{
var componentDesc = descriptions[c];
m_ComponentIsInstanced[c] = componentDesc.isPerInstance;
if(componentDesc.instanceType == instanceType)
{
m_ComponentDataIndex[c] = m_UintPerInstance;
m_DescriptionsUintSize[c] = descriptions[c].byteSize / uintSize;
m_UintPerInstance += componentDesc.isPerInstance ? (componentDesc.byteSize / uintSize) : 0;
}
else
{
m_ComponentDataIndex[c] = -1;
m_DescriptionsUintSize[c] = 0;
}
}
m_TmpDataBuffer = new NativeArray<uint>(m_Capacity * m_UintPerInstance, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
}
public unsafe IntPtr GetUploadBufferPtr()
{
Assert.IsTrue(m_TmpDataBuffer.IsCreated);
Assert.IsTrue(m_TmpDataBuffer.Length > 0 && m_InstanceCount > 0);
return new IntPtr(m_TmpDataBuffer.GetUnsafePtr());
}
public int GetUIntPerInstance()
{
return m_UintPerInstance;
}
public int GetParamUIntOffset(int parameterIndex)
{
Assert.IsTrue(m_ComponentIsInstanced[parameterIndex], "Component is non instanced. Can only call this function on parameters that are for all instances.");
Assert.IsTrue(parameterIndex >= 0 && parameterIndex < m_ComponentDataIndex.Length, "Parameter index invalid.");
Assert.IsTrue(m_ComponentDataIndex[parameterIndex] != -1, "Parameter index is not allocated. Did you allocate proper InstanceType parameters?");
return m_ComponentDataIndex[parameterIndex];
}
public int PrepareParamWrite<T>(int parameterIndex) where T : unmanaged
{
int uintPerParameter = UnsafeUtility.SizeOf<T>() / UnsafeUtility.SizeOf<uint>();
Assert.IsTrue(uintPerParameter == m_DescriptionsUintSize[parameterIndex], "Parameter to write is incompatible, must be same stride as destination.");
if (!m_WritenComponentIndices.Contains(parameterIndex))
m_WritenComponentIndices.Add(parameterIndex);
return GetParamUIntOffset(parameterIndex);
}
public unsafe void AllocateUploadHandles(int handlesLength)
{
// No need to preallocate instances anymore, as those are passed as parameters to SubmitToGPU to avoid data duplication
// We just set the instance count here to ensure that a) we have the correct capacity and b) write/gatherInstanceData copies the correct amount
Assert.IsTrue(m_Capacity >= handlesLength);
m_InstanceCount = handlesLength;
}
public unsafe JobHandle WriteInstanceDataJob<T>(int parameterIndex, NativeArray<T> instanceData) where T : unmanaged
{
return WriteInstanceDataJob(parameterIndex, instanceData, m_DummyArray);
}
public unsafe JobHandle WriteInstanceDataJob<T>(int parameterIndex, NativeArray<T> instanceData, NativeArray<int> gatherIndices) where T : unmanaged
{
if (m_InstanceCount == 0)
return default;
var gatherData = gatherIndices.Length != 0;
Assert.IsTrue(gatherData || instanceData.Length == m_InstanceCount);
Assert.IsTrue(!gatherData || gatherIndices.Length == m_InstanceCount);
Assert.IsTrue(UnsafeUtility.SizeOf<T>() >= UnsafeUtility.SizeOf<uint>());
int uintPerParameter = UnsafeUtility.SizeOf<T>() / UnsafeUtility.SizeOf<uint>();
Assert.IsTrue(m_ComponentIsInstanced[parameterIndex], "Component is non instanced. Can only call this function on parameters that are for all instances.");
Assert.IsTrue(uintPerParameter == m_DescriptionsUintSize[parameterIndex], "Parameter to write is incompatible, must be same stride as destination.");
Assert.IsTrue(parameterIndex >= 0 && parameterIndex < m_ComponentDataIndex.Length, "Parameter index invalid.");
Assert.IsTrue(m_ComponentDataIndex[parameterIndex] != -1, "Parameter index is not allocated. Did you allocate proper InstanceType parameters?");
if (!m_WritenComponentIndices.Contains(parameterIndex))
m_WritenComponentIndices.Add(parameterIndex);
var writeJob = new WriteInstanceDataParameterJob
{
gatherData = gatherData,
gatherIndices = gatherIndices,
parameterIndex = parameterIndex,
uintPerParameter = uintPerParameter,
uintPerInstance = m_UintPerInstance,
componentDataIndex = m_ComponentDataIndex,
instanceData = instanceData.Reinterpret<uint>(UnsafeUtility.SizeOf<T>()),
tmpDataBuffer = m_TmpDataBuffer
};
return writeJob.Schedule(m_InstanceCount, WriteInstanceDataParameterJob.k_BatchSize);
}
public void SubmitToGpu(GPUInstanceDataBuffer instanceDataBuffer, NativeArray<GPUInstanceIndex> gpuInstanceIndices, ref GPUResources gpuResources, bool submitOnlyWrittenParams)
{
if (m_InstanceCount == 0)
return;
Assert.IsTrue(gpuInstanceIndices.Length == m_InstanceCount);
++instanceDataBuffer.version;
int uintSize = UnsafeUtility.SizeOf<uint>();
int instanceByteSize = m_UintPerInstance * uintSize;
gpuResources.CreateResources(m_InstanceCount, instanceByteSize, m_ComponentDataIndex.Length, m_WritenComponentIndices.Length);
gpuResources.instanceData.SetData(m_TmpDataBuffer, 0, 0, m_InstanceCount * m_UintPerInstance);
gpuResources.instanceIndices.SetData(gpuInstanceIndices, 0, 0, m_InstanceCount);
gpuResources.inputComponentOffsets.SetData(m_ComponentDataIndex, 0, 0, m_ComponentDataIndex.Length);
gpuResources.cs.SetInt(UploadKernelIDs._InputInstanceCounts, m_InstanceCount);
gpuResources.cs.SetInt(UploadKernelIDs._InputInstanceByteSize, instanceByteSize);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputInstanceData, gpuResources.instanceData);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputInstanceIndices, gpuResources.instanceIndices);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentOffsets, gpuResources.inputComponentOffsets);
if (submitOnlyWrittenParams)
{
gpuResources.validComponentIndices.SetData(m_WritenComponentIndices.AsArray(), 0, 0, m_WritenComponentIndices.Length);
gpuResources.cs.SetInt(UploadKernelIDs._InputValidComponentCounts, m_WritenComponentIndices.Length);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputValidComponentIndices, gpuResources.validComponentIndices);
}
else
{
gpuResources.cs.SetInt(UploadKernelIDs._InputValidComponentCounts, instanceDataBuffer.perInstanceComponentCount);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputValidComponentIndices, instanceDataBuffer.validComponentsIndicesGpuBuffer);
}
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentAddresses, instanceDataBuffer.componentAddressesGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentByteCounts, instanceDataBuffer.componentByteCountsGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._InputComponentInstanceIndexRanges, instanceDataBuffer.componentInstanceIndexRangesGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, UploadKernelIDs._OutputBuffer, instanceDataBuffer.gpuBuffer);
gpuResources.cs.Dispatch(gpuResources.kernelId, (m_InstanceCount + 63) / 64, 1, 1);
m_InstanceCount = 0;
m_WritenComponentIndices.Clear();
}
public void SubmitToGpu(GPUInstanceDataBuffer instanceDataBuffer, NativeArray<InstanceHandle> instances, ref GPUResources gpuResources, bool submitOnlyWrittenParams)
{
if (m_InstanceCount == 0)
return;
var gpuInstanceIndices = new NativeArray<GPUInstanceIndex>(instances.Length, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
instanceDataBuffer.CPUInstanceArrayToGPUInstanceArray(instances, gpuInstanceIndices);
SubmitToGpu(instanceDataBuffer, gpuInstanceIndices, ref gpuResources, submitOnlyWrittenParams);
gpuInstanceIndices.Dispose();
}
public void Dispose()
{
if (m_ComponentDataIndex.IsCreated)
m_ComponentDataIndex.Dispose();
if (m_ComponentIsInstanced.IsCreated)
m_ComponentIsInstanced.Dispose();
if (m_DescriptionsUintSize.IsCreated)
m_DescriptionsUintSize.Dispose();
if (m_TmpDataBuffer.IsCreated)
m_TmpDataBuffer.Dispose();
if (m_WritenComponentIndices.IsCreated)
m_WritenComponentIndices.Dispose();
if(m_DummyArray.IsCreated)
m_DummyArray.Dispose();
}
[BurstCompile(DisableSafetyChecks = true, OptimizeFor = OptimizeFor.Performance)]
internal struct WriteInstanceDataParameterJob : IJobParallelFor
{
public const int k_BatchSize = 512;
[ReadOnly] public bool gatherData;
[ReadOnly] public int parameterIndex;
[ReadOnly] public int uintPerParameter;
[ReadOnly] public int uintPerInstance;
[ReadOnly] public NativeArray<int> componentDataIndex;
[ReadOnly] public NativeArray<int> gatherIndices;
[NativeDisableContainerSafetyRestriction, NoAlias][ReadOnly] public NativeArray<uint> instanceData;
[NativeDisableContainerSafetyRestriction, NoAlias][WriteOnly] public NativeArray<uint> tmpDataBuffer;
public unsafe void Execute(int index)
{
Assert.IsTrue(index * uintPerInstance < tmpDataBuffer.Length, "Trying to write to an instance buffer out of bounds.");
int dataOffset = (gatherData ? gatherIndices[index] : index) * uintPerParameter;
Assert.IsTrue(dataOffset < instanceData.Length);
int uintSize = UnsafeUtility.SizeOf<uint>();
uint* data = (uint*)instanceData.GetUnsafePtr() + dataOffset;
UnsafeUtility.MemCpy((uint*)tmpDataBuffer.GetUnsafePtr() + index * uintPerInstance + componentDataIndex[parameterIndex], data,
uintPerParameter * uintSize);
}
}
}
internal struct GPUInstanceDataBufferGrower : IDisposable
{
private static class CopyInstancesKernelIDs
{
public static readonly int _InputValidComponentCounts = Shader.PropertyToID("_InputValidComponentCounts");
public static readonly int _InstanceCounts = Shader.PropertyToID("_InstanceCounts");
public static readonly int _InstanceOffset = Shader.PropertyToID("_InstanceOffset");
public static readonly int _OutputInstanceOffset = Shader.PropertyToID("_OutputInstanceOffset");
public static readonly int _ValidComponentIndices = Shader.PropertyToID("_ValidComponentIndices");
public static readonly int _ComponentByteCounts = Shader.PropertyToID("_ComponentByteCounts");
public static readonly int _InputComponentAddresses = Shader.PropertyToID("_InputComponentAddresses");
public static readonly int _OutputComponentAddresses = Shader.PropertyToID("_OutputComponentAddresses");
public static readonly int _InputComponentInstanceIndexRanges = Shader.PropertyToID("_InputComponentInstanceIndexRanges");
public static readonly int _InputBuffer = Shader.PropertyToID("_InputBuffer");
public static readonly int _OutputBuffer = Shader.PropertyToID("_OutputBuffer");
}
public struct GPUResources : IDisposable
{
public ComputeShader cs;
public int kernelId;
public void LoadShaders(GPUResidentDrawerResources resources)
{
if (cs == null)
{
cs = resources.instanceDataBufferCopyKernels;
kernelId = cs.FindKernel("MainCopyInstances");
}
}
public void CreateResources()
{
}
public void Dispose()
{
cs = null;
}
}
private GPUInstanceDataBuffer m_SrcBuffer;
private GPUInstanceDataBuffer m_DstBuffer;
//@ We should implement buffer shrinker too, otherwise lots of instances can be allocated for trees for example
//@ while there are no trees in scenes that are in use at all.
public unsafe GPUInstanceDataBufferGrower(GPUInstanceDataBuffer sourceBuffer, in InstanceNumInfo instanceNumInfo)
{
m_SrcBuffer = sourceBuffer;
m_DstBuffer = null;
bool needToGrow = false;
for(int i = 0; i < (int)InstanceType.Count; ++i)
{
Assert.IsTrue(instanceNumInfo.InstanceNums[i] >= sourceBuffer.instanceNumInfo.InstanceNums[i], "Shrinking GPU instance buffer is not supported yet.");
if (instanceNumInfo.InstanceNums[i] > sourceBuffer.instanceNumInfo.InstanceNums[i])
needToGrow = true;
}
if (!needToGrow)
return;
GPUInstanceDataBufferBuilder builder = new GPUInstanceDataBufferBuilder();
foreach (GPUInstanceComponentDesc descriptor in sourceBuffer.descriptions)
builder.AddComponent(descriptor.propertyID, descriptor.isOverriden, descriptor.byteSize, descriptor.isPerInstance, descriptor.instanceType, descriptor.componentGroup);
m_DstBuffer = builder.Build(instanceNumInfo);
builder.Dispose();
}
public GPUInstanceDataBuffer SubmitToGpu(ref GPUResources gpuResources)
{
if (m_DstBuffer == null)
return m_SrcBuffer;
int totalInstanceCount = m_SrcBuffer.instanceNumInfo.GetTotalInstanceNum();
if(totalInstanceCount == 0)
return m_DstBuffer;
Assert.IsTrue(m_SrcBuffer.perInstanceComponentCount == m_DstBuffer.perInstanceComponentCount);
gpuResources.CreateResources();
gpuResources.cs.SetInt(CopyInstancesKernelIDs._InputValidComponentCounts, m_SrcBuffer.perInstanceComponentCount);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._ValidComponentIndices, m_SrcBuffer.validComponentsIndicesGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._ComponentByteCounts, m_SrcBuffer.componentByteCountsGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputComponentAddresses, m_SrcBuffer.componentAddressesGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputComponentInstanceIndexRanges, m_SrcBuffer.componentInstanceIndexRangesGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._OutputComponentAddresses, m_DstBuffer.componentAddressesGpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._InputBuffer, m_SrcBuffer.gpuBuffer);
gpuResources.cs.SetBuffer(gpuResources.kernelId, CopyInstancesKernelIDs._OutputBuffer, m_DstBuffer.gpuBuffer);
//@ We could compute new instance indices on CPU and do one dispatch.
//@ Otherwise in theory these multiple dispatches could overlap with no UAV barrier between them as they write to a different parts of the UAV.
//@ Need to profile which is better.
for(int i = 0; i < (int)InstanceType.Count; ++i)
{
int instanceCount = m_SrcBuffer.instanceNumInfo.GetInstanceNum((InstanceType)i);
if(instanceCount > 0)
{
int instanceOffset = m_SrcBuffer.instancesNumPrefixSum[i];
int outputInstanceOffset = m_DstBuffer.instancesNumPrefixSum[i];
gpuResources.cs.SetInt(CopyInstancesKernelIDs._InstanceCounts, instanceCount);
gpuResources.cs.SetInt(CopyInstancesKernelIDs._InstanceOffset, instanceOffset);
gpuResources.cs.SetInt(CopyInstancesKernelIDs._OutputInstanceOffset, outputInstanceOffset);
gpuResources.cs.Dispatch(gpuResources.kernelId, (instanceCount + 63) / 64, 1, 1);
}
}
return m_DstBuffer;
}
public void Dispose()
{
}
}
}