You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

297 lines
7.9 KiB

#pragma multi_compile _ EMULATE_WAVE_SIZE_8 EMULATE_WAVE_SIZE_16 EMULATE_WAVE_SIZE_32 EMULATE_WAVE_SIZE_64 EMULATE_WAVE_SIZE_128
#pragma multi_compile _ UNITY_DEVICE_SUPPORTS_WAVE_ANY
// Warning: Keep this kernel order in sync with WaveKernel enum in WaveEmulationTests.cs
#pragma kernel kAllEqual
#pragma kernel kAllTrue
#pragma kernel kAnyTrue
#pragma kernel kBallot
#pragma kernel kBitAnd
#pragma kernel kBitOr
#pragma kernel kBitXor
#pragma kernel kCountBits
#pragma kernel kMax
#pragma kernel kMin
#pragma kernel kProduct
#pragma kernel kSum
#pragma kernel kGetLaneCount
#pragma kernel kGetLaneIndex
#pragma kernel kIsFirstLane
#pragma kernel kPrefixCountBits
#pragma kernel kPrefixProduct
#pragma kernel kPrefixSum
#pragma kernel kReadLaneAtBroadcast
#pragma kernel kReadLaneAtShuffle
#pragma kernel kReadLaneFirst
#if EMULATE_WAVE_SIZE_8
#define THREADING_WAVE_SIZE 8
#elif EMULATE_WAVE_SIZE_16
#define THREADING_WAVE_SIZE 16
#elif EMULATE_WAVE_SIZE_32
#define THREADING_WAVE_SIZE 32
#elif EMULATE_WAVE_SIZE_64
#define THREADING_WAVE_SIZE 64
#elif EMULATE_WAVE_SIZE_128
#define THREADING_WAVE_SIZE 128
#endif
// Required to define before using the threading library.
#define THREADING_BLOCK_SIZE 128
// BUG: There are currently a few issues preventing SM6 intrinsics from working on all platforms.
// For now, we avoid testing SM6 intrinsics on those platforms. Once the issues are resolved, this workaround should be removed to improve test coverage.
//
// Issues:
// 1. The BitXor, Product, and PrefixProduct SM6 intrinsics fail to compile on Xbox One because they aren't supported.
// We could potentially emulate these if necessary, but they aren't particularly useful.
// For now, we just skip the native path tests on Xbox One.
#define ENABLE_SM6_WORKAROUND (defined(SHADER_API_GAMECORE_XBOXONE) || defined(SHADER_API_XBOXONE))
// Force emulation on whenever a specific wave size is provided
#if defined(THREADING_WAVE_SIZE) || ENABLE_SM6_WORKAROUND
#define THREADING_FORCE_WAVE_EMULATION
#endif
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Threading.hlsl"
typedef Threading::Wave Wave;
typedef Threading::Group Group;
ByteAddressBuffer _Input;
RWByteAddressBuffer _Output;
// Macros
#define DATA ((uint)_Input.Load(group.dispatchID.x << 2))
#define OUTPUT_ADDR ((uint)(group.groupIndex << 2))
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kAllEqual(Group group)
{
Wave wave = group.GetWave();
const uint result = (uint)wave.AllEqual(DATA - group.groupIndex);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kAllTrue(Group group)
{
Wave wave = group.GetWave();
const uint result = (uint)wave.AllTrue(DATA == group.groupIndex);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kAnyTrue(Group group)
{
Wave wave = group.GetWave();
const uint result = (uint)wave.AnyTrue((DATA & (wave.GetLaneCount() - 1)) == (wave.GetLaneCount() / 2));
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kBallot(Group group)
{
Wave wave = group.GetWave();
uint4 ballot = wave.Ballot(DATA == group.groupIndex);
uint numBits = (countbits(ballot.x) + countbits(ballot.y) + countbits(ballot.z) + countbits(ballot.w));
const uint result = (numBits == wave.GetLaneCount());
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kBitAnd(Group group)
{
Wave wave = group.GetWave();
const uint result = wave.And(DATA == group.groupIndex);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kBitOr(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.Or(DATA != group.groupIndex) == 0);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kBitXor(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.Xor(DATA != group.groupIndex) == 0);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kCountBits(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.CountBits(DATA == group.groupIndex) == wave.GetLaneCount());
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kMax(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.Max(DATA & (wave.GetLaneCount() - 1)) == (wave.GetLaneCount() - 1));
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kMin(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.Min(DATA & (wave.GetLaneCount() - 1)) == 0);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kProduct(Group group)
{
Wave wave = group.GetWave();
const uint result = wave.Product((uint)(DATA == group.groupIndex));
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kSum(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.Sum((uint)(DATA == group.groupIndex)) == wave.GetLaneCount());
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kGetLaneCount(Group group)
{
#if defined(THREADING_WAVE_SIZE)
Wave wave = group.GetWave();
const uint result = (THREADING_WAVE_SIZE == wave.GetLaneCount());
#else
// If we don't know the wave size at compile time, we don't really have any way of verifying this functionality.
// Unconditionally return 1 so this case always passes.
const uint result = 1;
#endif
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kGetLaneIndex(Group group)
{
Wave wave = group.GetWave();
const uint result = ((DATA & (wave.GetLaneCount() - 1)) == wave.GetLaneIndex());
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kIsFirstLane(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.GetLaneIndex() != 0) ^ wave.IsFirstLane();
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kPrefixCountBits(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.PrefixCountBits(DATA == group.groupIndex) == wave.GetLaneIndex());
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kPrefixProduct(Group group)
{
Wave wave = group.GetWave();
const uint value = ((wave.GetLaneIndex() % 8) == 0) ? 2u : 1u;
const uint expectedValue = (1u << ((wave.GetLaneIndex() / 8u) + 1));
const uint result = ((wave.PrefixProduct(value) * value) == expectedValue);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kPrefixSum(Group group)
{
Wave wave = group.GetWave();
const uint value = wave.GetLaneIndex();
const uint expectedValue = (wave.GetLaneIndex() * (wave.GetLaneIndex() + 1u)) / 2u;
const uint result = ((wave.PrefixSum(value) + value) == expectedValue);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kReadLaneAtBroadcast(Group group)
{
Wave wave = group.GetWave();
const uint laneValue = DATA & (wave.GetLaneCount() - 1);
const uint result = (wave.ReadLaneAt(laneValue, (wave.GetLaneCount() - 1)) == (wave.GetLaneCount() - 1));
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kReadLaneAtShuffle(Group group)
{
Wave wave = group.GetWave();
const uint laneValue = DATA & (wave.GetLaneCount() - 1);
const uint result = (wave.ReadLaneAt(laneValue, laneValue) == laneValue);
_Output.Store(OUTPUT_ADDR, result);
}
[numthreads(THREADING_BLOCK_SIZE, 1, 1)]
void kReadLaneFirst(Group group)
{
Wave wave = group.GetWave();
const uint result = (wave.ReadLaneFirst(DATA & (wave.GetLaneCount() - 1)) == 0);
_Output.Store(OUTPUT_ADDR, result);
}