in renderdoc/driver/vulkan/vk_postvs.cpp [77:1472]
static void ConvertToMeshOutputCompute(const ShaderReflection &refl,
const SPIRVPatchData &patchData, const rdcstr &entryName,
StorageMode storageMode, rdcarray<uint32_t> instDivisor,
const ActionDescription *action, uint32_t numVerts,
uint32_t numViews, uint32_t baseSpecConstant,
rdcarray<uint32_t> &modSpirv, uint32_t &bufStride)
{
rdcspv::Editor editor(modSpirv);
editor.Prepare();
uint32_t numInputs = (uint32_t)refl.inputSignature.size();
uint32_t numOutputs = (uint32_t)refl.outputSignature.size();
RDCASSERT(numOutputs > 0);
if(storageMode == Binding)
{
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
end = editor.End(rdcspv::Section::Annotations);
it < end; ++it)
{
// we will use descriptor set 0 bindings 0..N for our own purposes when not using buffer
// device address.
//
// Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
// redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
// where all descriptor sets are already used. In theory we only have to do this with set 0,
// but that requires knowing which variables are in set 0 and it's simpler to increase all
// bindings.
if(it.opcode() == rdcspv::Op::Decorate)
{
rdcspv::OpDecorate dec(it);
if(dec.decoration == rdcspv::Decoration::Binding)
{
RDCASSERT(dec.decoration.binding < (0xffffffff - MeshOutputReservedBindings));
dec.decoration.binding += MeshOutputReservedBindings;
it = dec;
}
}
}
}
struct inputOutputIDs
{
// if this is a builtin value, what builtin value is expected
ShaderBuiltin builtin = ShaderBuiltin::Undefined;
// ID of the variable itself. This is the original Input/Output pointer variable that we convert
// to a private pointer
rdcspv::Id variable;
// constant ID for the index of this attribute
rdcspv::Id indexConst;
// base gvec4 type for this input. We always fetch uvec4 from the buffer but then bitcast to
// vec4 or ivec4 if needed
rdcspv::Id fetchVec4Type;
// the actual gvec4 type for the input, possibly needed to convert to from the above if it's
// declared as a 16-bit type since we always fetch 32-bit.
rdcspv::Id vec4Type;
// the base type for this attribute. Must be present already by definition! This is the same
// scalar type as vec4Type but with the correct number of components.
rdcspv::Id baseType;
// Uniform Pointer type ID for this output. Used only for output data, to write to output SSBO
rdcspv::Id ssboPtrType;
// Output Pointer type ID for this attribute.
// For inputs, used to 'write' to the global at the start.
// For outputs, used to 'read' from the global at the end.
rdcspv::Id privatePtrType;
};
rdcarray<inputOutputIDs> ins;
ins.resize(numInputs);
rdcarray<inputOutputIDs> outs;
outs.resize(numOutputs);
std::set<rdcspv::Id> inputs;
std::set<rdcspv::Id> outputs;
std::map<rdcspv::Id, rdcspv::Id> typeReplacements;
// keep track of any builtins we're preserving
std::set<rdcspv::Id> builtinKeeps;
// detect builtin inputs or outputs, and remove builtin decorations
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
end = editor.End(rdcspv::Section::Annotations);
it < end; ++it)
{
if(it.opcode() == rdcspv::Op::Decorate)
{
rdcspv::OpDecorate decorate(it);
// remove any builtin decorations
if(decorate.decoration == rdcspv::Decoration::BuiltIn)
{
// subgroup builtins can be allowed to stay
if(decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupEqMask ||
decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupGtMask ||
decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupGeMask ||
decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupLtMask ||
decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupLeMask ||
decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupLocalInvocationId ||
decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupSize)
{
builtinKeeps.insert(decorate.target);
continue;
}
// we don't have to do anything, the ID mapping is in the rdcspv::PatchData, so just discard
// the location information
editor.Remove(it);
}
// remove all invariant decorations
else if(decorate.decoration == rdcspv::Decoration::Invariant)
{
editor.Remove(it);
}
// remove all index decorations
else if(decorate.decoration == rdcspv::Decoration::Index)
{
editor.Remove(it);
}
// same with flat/noperspective
else if(decorate.decoration == rdcspv::Decoration::Flat ||
decorate.decoration == rdcspv::Decoration::NoPerspective)
{
editor.Remove(it);
}
else if(decorate.decoration == rdcspv::Decoration::Location ||
decorate.decoration == rdcspv::Decoration::Component)
{
// we don't have to do anything, the ID mapping is in the rdcspv::PatchData, so just discard
// the location information
editor.Remove(it);
}
}
if(it.opcode() == rdcspv::Op::MemberDecorate)
{
rdcspv::OpMemberDecorate memberDecorate(it);
if(memberDecorate.decoration == rdcspv::Decoration::BuiltIn)
editor.Remove(it);
}
}
// rewrite any inputs and outputs to be private storage class
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::TypesVariablesConstants),
end = editor.End(rdcspv::Section::TypesVariablesConstants);
it < end; ++it)
{
// rewrite any input/output variables to private, and build up inputs/outputs list
if(it.opcode() == rdcspv::Op::TypePointer)
{
rdcspv::OpTypePointer ptr(it);
rdcspv::Id id;
if(ptr.storageClass == rdcspv::StorageClass::Input)
{
id = ptr.result;
inputs.insert(id);
}
else if(ptr.storageClass == rdcspv::StorageClass::Output)
{
id = ptr.result;
outputs.insert(id);
rdcspv::Iter baseIt = editor.GetID(ptr.type);
if(baseIt && baseIt.opcode() == rdcspv::Op::TypeStruct)
outputs.insert(ptr.type);
}
else if(ptr.storageClass == rdcspv::StorageClass::Private ||
ptr.storageClass == rdcspv::StorageClass::Function)
{
// with variable pointers, we could have a private/function pointer into one of the pointer
// types we've replaced (e.g. Input and Output where one is patched to be private and the
// other is replaced since we deduplicate pointer types)
//
// we don't have to re-order the declaration, since we're iterating the types in order so
// the replacement is always earlier than the type it was replacing
if(typeReplacements.find(ptr.type) != typeReplacements.end())
{
editor.PreModify(it);
ptr.type = typeReplacements[ptr.type];
it = ptr;
// if we didn't already have this pointer, process the modified type declaration
editor.PostModify(it);
}
}
if(id)
{
rdcspv::Pointer privPtr(ptr.type, rdcspv::StorageClass::Private);
rdcspv::Id origId = editor.GetType(privPtr);
if(origId)
{
// if we already had a private pointer for this type, we have to use that type - we can't
// create a new type by aliasing. Thus we need to replace any uses of 'id' with 'origId'.
typeReplacements[id] = origId;
// and remove this type declaration
editor.Remove(it);
}
else
{
editor.PreModify(it);
ptr.storageClass = rdcspv::StorageClass::Private;
it = ptr;
// if we didn't already have this pointer, process the modified type declaration
editor.PostModify(it);
}
}
}
else if(it.opcode() == rdcspv::Op::Variable)
{
rdcspv::OpVariable var(it);
bool mod = false;
if(builtinKeeps.find(var.result) != builtinKeeps.end())
{
// if this variable is one we're keeping as a builtin, we need to do something different.
// We don't change its storage class, but we might need to redeclare the pointer as the
// right matching storage class (because it's been patched to private). This might be
editor.PreModify(it);
rdcspv::Id ptrId = var.resultType;
// if this is in typeReplacements the id is no longer valid and was removed
auto replIt = typeReplacements.find(ptrId);
if(replIt != typeReplacements.end())
ptrId = replIt->second;
rdcspv::OpTypePointer ptr(editor.GetID(ptrId));
// declare if necessary the right pointer again, and use that as our type
var.resultType = editor.DeclareType(rdcspv::Pointer(ptr.type, var.storageClass));
it = var;
editor.PostModify(it);
// copy this variable declaration to the end of the section, after our potentially 'new'
// recreated pointer type
rdcspv::Operation op = rdcspv::Operation::copy(it);
editor.Remove(it);
editor.AddVariable(op);
// don't need to add this to the globals because if it needed to be in there it already was
// don't do any of the rest of the processing
continue;
}
else if(var.storageClass == rdcspv::StorageClass::Input)
{
mod = true;
editor.PreModify(it);
var.storageClass = rdcspv::StorageClass::Private;
inputs.insert(var.result);
}
else if(var.storageClass == rdcspv::StorageClass::Output)
{
mod = true;
editor.PreModify(it);
var.storageClass = rdcspv::StorageClass::Private;
outputs.insert(var.result);
}
auto replIt = typeReplacements.find(var.resultType);
if(replIt != typeReplacements.end())
{
if(!mod)
editor.PreModify(it);
mod = true;
var.resultType = replIt->second;
}
if(mod)
{
it = var;
editor.PostModify(it);
}
// if we repointed this variable to an existing private declaration, we must also move it to
// the end of the section. The reason being that the private pointer type declared may be
// declared *after* this variable. There can't be any dependencies on this later in the
// section because it's a variable not a type, so it's safe to move to the end.
if(replIt != typeReplacements.end())
{
// make a copy of the opcode
rdcspv::Operation op = rdcspv::Operation::copy(it);
// remove the old one
editor.Remove(it);
// add it anew
editor.AddVariable(op);
// don't need to add this to the globals because if it needed to be in there it already was
}
}
else if(it.opcode() == rdcspv::Op::TypeFunction)
{
rdcspv::OpTypeFunction func(it);
bool mod = false;
auto replIt = typeReplacements.find(func.result);
if(replIt != typeReplacements.end())
{
editor.PreModify(it);
mod = true;
func.result = replIt->second;
}
for(size_t i = 0; i < func.parameters.size(); i++)
{
replIt = typeReplacements.find(func.parameters[i]);
if(replIt != typeReplacements.end())
{
if(!mod)
editor.PreModify(it);
mod = true;
func.parameters[i] = replIt->second;
}
}
if(mod)
{
it = func;
editor.PostModify(it);
}
}
else if(it.opcode() == rdcspv::Op::ConstantNull)
{
rdcspv::OpConstantNull nullconst(it);
auto replIt = typeReplacements.find(nullconst.resultType);
if(replIt != typeReplacements.end())
{
editor.PreModify(it);
nullconst.resultType = replIt->second;
it = nullconst;
editor.PostModify(it);
}
}
else if(it.opcode() == rdcspv::Op::Undef)
{
rdcspv::OpUndef undef(it);
auto replIt = typeReplacements.find(undef.resultType);
if(replIt != typeReplacements.end())
{
editor.PreModify(it);
undef.resultType = replIt->second;
it = undef;
editor.PostModify(it);
}
}
}
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Functions); it; ++it)
{
// identify functions with result types we might want to replace
if(it.opcode() == rdcspv::Op::Function || it.opcode() == rdcspv::Op::FunctionParameter ||
it.opcode() == rdcspv::Op::Variable || it.opcode() == rdcspv::Op::AccessChain ||
it.opcode() == rdcspv::Op::InBoundsAccessChain || it.opcode() == rdcspv::Op::Bitcast ||
it.opcode() == rdcspv::Op::Undef || it.opcode() == rdcspv::Op::ExtInst ||
it.opcode() == rdcspv::Op::FunctionCall || it.opcode() == rdcspv::Op::Phi ||
it.opcode() == rdcspv::Op::Select)
{
editor.PreModify(it);
rdcspv::Id id = rdcspv::Id::fromWord(it.word(1));
auto replIt = typeReplacements.find(id);
if(replIt != typeReplacements.end())
id = replIt->second;
it.word(1) = id.value();
editor.PostModify(it);
}
}
rdcspv::Id entryID;
std::set<rdcspv::Id> entries;
for(const rdcspv::EntryPoint &entry : editor.GetEntries())
{
if(entry.name == entryName && entry.executionModel == rdcspv::ExecutionModel::Vertex)
entryID = entry.id;
entries.insert(entry.id);
}
RDCASSERT(entryID);
// tracks all the global variables we use, for compliance with SPIR-V 1.4.
rdcarray<rdcspv::Id> globals;
// we remove all entry points, we'll create one of our own.
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::EntryPoints),
end = editor.End(rdcspv::Section::EntryPoints);
it < end; ++it)
{
rdcspv::OpEntryPoint entry(it);
// when we find the entry point we're patching, grab it's interface for the set of globals. We
// will be patching and Input/Output variables to private, but from SPIR-V 1.4 the interface
// needs to include privates as well.
if(entry.entryPoint == entryID)
globals = entry.iface;
editor.Remove(it);
}
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::ExecutionMode),
end = editor.End(rdcspv::Section::ExecutionMode);
it < end; ++it)
{
// this can also handle ExecutionModeId and we don't care about the difference
rdcspv::OpExecutionMode execMode(it);
if(execMode.entryPoint != entryID)
editor.Remove(it);
}
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::DebugNames),
end2 = editor.End(rdcspv::Section::DebugNames);
it < end2; ++it)
{
if(it.opcode() == rdcspv::Op::Name)
{
rdcspv::OpName name(it);
if(inputs.find(name.target) != inputs.end() || outputs.find(name.target) != outputs.end())
{
editor.Remove(it);
if(typeReplacements.find(name.target) == typeReplacements.end())
editor.SetName(name.target, "emulated_" + name.name);
}
// remove any OpName for the old entry points
if(entries.find(name.target) != entries.end())
editor.Remove(it);
// remove any OpName for deleted types
if(typeReplacements.find(name.target) != typeReplacements.end())
editor.Remove(it);
}
}
rdcspv::StorageClass bufferClass;
if(storageMode == Binding)
bufferClass = editor.StorageBufferClass();
else
bufferClass = rdcspv::StorageClass::PhysicalStorageBuffer;
// declare necessary variables per-output, types and constants. We do this last so that we don't
// add a private pointer that we later try and deduplicate when collapsing output/input pointers
// to private
for(uint32_t i = 0; i < numOutputs; i++)
{
inputOutputIDs &io = outs[i];
io.builtin = refl.outputSignature[i].systemValue;
// constant for this index
io.indexConst = editor.AddConstantImmediate(i);
io.variable = patchData.outputs[i].ID;
// base type - either a scalar or a vector, since matrix outputs are decayed to vectors
{
rdcspv::Scalar scalarType = rdcspv::scalar(refl.outputSignature[i].varType);
io.vec4Type = editor.DeclareType(rdcspv::Vector(scalarType, 4));
if(refl.outputSignature[i].compCount > 1)
io.baseType =
editor.DeclareType(rdcspv::Vector(scalarType, refl.outputSignature[i].compCount));
else
io.baseType = editor.DeclareType(scalarType);
}
io.ssboPtrType = editor.DeclareType(rdcspv::Pointer(io.baseType, bufferClass));
io.privatePtrType =
editor.DeclareType(rdcspv::Pointer(io.baseType, rdcspv::StorageClass::Private));
RDCASSERT(io.baseType && io.vec4Type && io.indexConst && io.privatePtrType && io.ssboPtrType,
io.baseType, io.vec4Type, io.indexConst, io.privatePtrType, io.ssboPtrType);
}
// repeat for inputs
for(uint32_t i = 0; i < numInputs; i++)
{
inputOutputIDs &io = ins[i];
io.builtin = refl.inputSignature[i].systemValue;
// constant for this index
io.indexConst = editor.AddConstantImmediate(i);
io.variable = patchData.inputs[i].ID;
VarType vType = refl.inputSignature[i].varType;
rdcspv::Scalar scalarType = rdcspv::scalar(vType);
// 64-bit values are loaded as uvec4 and then packed in pairs, so we need to declare vec4ID as
// uvec4
if(vType == VarType::Double || vType == VarType::ULong || vType == VarType::SLong)
{
io.fetchVec4Type = io.vec4Type =
editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
}
else
{
io.vec4Type = editor.DeclareType(rdcspv::Vector(scalarType, 4));
// if the underlying scalar is actually
switch(vType)
{
case VarType::Half:
io.fetchVec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 4));
break;
case VarType::SShort:
case VarType::SByte:
io.fetchVec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<int32_t>(), 4));
break;
case VarType::UShort:
case VarType::UByte:
io.fetchVec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
break;
default: io.fetchVec4Type = io.vec4Type; break;
}
}
if(refl.inputSignature[i].compCount > 1)
io.baseType = editor.DeclareType(rdcspv::Vector(scalarType, refl.inputSignature[i].compCount));
else
io.baseType = editor.DeclareType(scalarType);
io.privatePtrType =
editor.DeclareType(rdcspv::Pointer(io.baseType, rdcspv::StorageClass::Private));
RDCASSERT(io.baseType && io.vec4Type && io.indexConst && io.privatePtrType, io.baseType,
io.vec4Type, io.indexConst, io.privatePtrType);
}
rdcspv::Id u32Type = editor.DeclareType(rdcspv::scalar<uint32_t>());
rdcspv::Id uvec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
rdcspv::Id uvec4StructPtrType;
rdcspv::Id uintStructPtrType;
rdcspv::Id arraySize = editor.AddConstantImmediate<uint32_t>(MeshOutputBufferArraySize);
rdcspv::Id vbuffersVariable, ibufferVariable;
rdcarray<rdcspv::Id> vbufferSpecConsts;
rdcarray<rdcspv::Id> vbufferVariables;
vbufferSpecConsts.resize(MeshOutputBufferArraySize);
vbufferVariables.resize(MeshOutputBufferArraySize);
rdcspv::Id ibufferSpecConst;
rdcspv::Id outputSpecConst;
{
rdcspv::Id runtimeArrayID =
editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), uvec4Type));
editor.AddDecoration(rdcspv::OpDecorate(
runtimeArrayID,
rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t) * 4)));
rdcspv::Id uvec4StructType =
editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
editor.SetName(uvec4StructType, "__rd_uvec4Struct");
editor.AddDecoration(rdcspv::OpMemberDecorate(
uvec4StructType, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
uvec4StructPtrType = editor.DeclareType(rdcspv::Pointer(uvec4StructType, bufferClass));
editor.SetName(uvec4StructPtrType, "__rd_uvec4Struct_ptr");
runtimeArrayID = editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), u32Type));
editor.AddDecoration(rdcspv::OpDecorate(
runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t))));
rdcspv::Id uintStructType =
editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
editor.SetName(uintStructType, "__rd_uintStruct");
editor.AddDecoration(rdcspv::OpMemberDecorate(
uintStructType, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
uintStructPtrType = editor.DeclareType(rdcspv::Pointer(uintStructType, bufferClass));
editor.SetName(uintStructPtrType, "__rd_uintStruct_ptr");
if(storageMode == Binding)
{
editor.DecorateStorageBufferStruct(uvec4StructType);
editor.DecorateStorageBufferStruct(uintStructType);
rdcspv::Id structArrayType = editor.AddType(
rdcspv::OpTypeArray(editor.MakeId(), uvec4StructType,
editor.AddConstantImmediate<uint32_t>(MeshOutputBufferArraySize)));
rdcspv::Id vbuffersType = editor.DeclareType(rdcspv::Pointer(structArrayType, bufferClass));
vbuffersVariable = editor.MakeId();
editor.AddVariable(rdcspv::OpVariable(vbuffersType, vbuffersVariable, bufferClass));
editor.AddDecoration(rdcspv::OpDecorate(
vbuffersVariable, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
editor.AddDecoration(rdcspv::OpDecorate(
vbuffersVariable, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(2)));
globals.push_back(vbuffersVariable);
editor.SetName(vbuffersVariable, "__rd_vbuffers");
if(action->flags & ActionFlags::Indexed)
{
rdcspv::Id ibufferType = editor.DeclareType(rdcspv::Pointer(uintStructType, bufferClass));
ibufferVariable = editor.MakeId();
editor.AddVariable(rdcspv::OpVariable(ibufferType, ibufferVariable, bufferClass));
editor.AddDecoration(rdcspv::OpDecorate(
ibufferVariable, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
editor.AddDecoration(rdcspv::OpDecorate(
ibufferVariable, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(1)));
globals.push_back(ibufferVariable);
editor.SetName(ibufferVariable, "__rd_ibuffer");
}
}
else
{
editor.AddDecoration(rdcspv::OpDecorate(uvec4StructType, rdcspv::Decoration::Block));
editor.AddDecoration(rdcspv::OpDecorate(uintStructType, rdcspv::Decoration::Block));
// add the extension
editor.AddExtension(storageMode == KHR_bda ? "SPV_KHR_physical_storage_buffer"
: "SPV_EXT_physical_storage_buffer");
// change the memory model to physical storage buffer 64
rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
rdcspv::OpMemoryModel model(it);
model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
it = model;
// add capabilities
editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);
if(storageMode == EXT_bda)
editor.AddCapability(rdcspv::Capability::Int64);
for(uint32_t i = 0; i <= MeshOutputBufferArraySize + 1; i++)
{
rdcspv::Id *dstId = NULL;
if(i < MeshOutputBufferArraySize)
dstId = &vbufferSpecConsts[i];
else if(i == MeshOutputBufferArraySize)
dstId = &ibufferSpecConst;
else if(i == MeshOutputBufferArraySize + 1)
dstId = &outputSpecConst;
if(!dstId)
break;
if(storageMode == KHR_bda)
{
rdcspv::Id addressConstantLSB =
editor.AddSpecConstantImmediate<uint32_t>(0U, baseSpecConstant + i * 2 + 0);
rdcspv::Id addressConstantMSB =
editor.AddSpecConstantImmediate<uint32_t>(0U, baseSpecConstant + i * 2 + 1);
rdcspv::Id uint2 = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
*dstId = editor.AddConstant(rdcspv::OpSpecConstantComposite(
uint2, editor.MakeId(), {addressConstantLSB, addressConstantMSB}));
}
else
{
*dstId = editor.AddSpecConstantImmediate<uint64_t>(0ULL, baseSpecConstant + i * 2);
}
if(i == MeshOutputBufferArraySize)
editor.SetName(*dstId, "__rd_ibufferConst");
else
editor.SetName(*dstId, StringFormat::Fmt("__rd_vbufferConst%u", i));
}
}
}
rdcspv::Id uvec4PtrType = editor.DeclareType(rdcspv::Pointer(uvec4Type, bufferClass));
rdcspv::Id uintPtrType = editor.DeclareType(rdcspv::Pointer(u32Type, bufferClass));
if(numInputs > 0)
{
editor.AddCapability(rdcspv::Capability::SampledBuffer);
}
rdcspv::Id outBufferVarID;
rdcspv::Id outputStructPtrType;
rdcspv::Id numVertsConstID = editor.AddConstantImmediate<uint32_t>(numVerts);
rdcspv::Id numInstConstID = editor.AddConstantImmediate<uint32_t>(action->numInstances);
rdcspv::Id numViewsConstID = editor.AddConstantImmediate<uint32_t>(numViews);
editor.SetName(numVertsConstID, "numVerts");
editor.SetName(numInstConstID, "numInsts");
editor.SetName(numViewsConstID, "numViews");
// declare the output buffer and its type
{
rdcarray<rdcspv::Id> members;
for(uint32_t o = 0; o < numOutputs; o++)
members.push_back(outs[o].baseType);
// struct vertex { ... outputs };
rdcspv::Id vertStructID = editor.DeclareStructType(members);
editor.SetName(vertStructID, "vertex_struct");
// vertex vertArray[];
rdcspv::Id runtimeArrayID =
editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), vertStructID));
editor.SetName(runtimeArrayID, "vertex_array");
uint32_t memberOffset = 0;
for(uint32_t o = 0; o < numOutputs; o++)
{
uint32_t elemSize = RDCMAX(4U, VarTypeByteSize(refl.outputSignature[o].varType));
uint32_t numComps = refl.outputSignature[o].compCount;
// ensure member is std430 packed (vec4 alignment for vec3/vec4)
if(numComps == 2)
memberOffset = AlignUp(memberOffset, 2U * elemSize);
else if(numComps > 2)
memberOffset = AlignUp(memberOffset, 4U * elemSize);
// apply decoration to each member in the struct with its offset in the struct
editor.AddDecoration(rdcspv::OpMemberDecorate(
vertStructID, o, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(memberOffset)));
memberOffset += elemSize * refl.outputSignature[o].compCount;
}
// align to 16 bytes (vec4) since we will almost certainly have
// a vec4 in the struct somewhere, and even in std430 alignment,
// the base struct alignment is still the largest base alignment
// of any member
bufStride = AlignUp16(memberOffset);
// struct meshOutput { vertex vertArray[]; };
rdcspv::Id outputStructID = editor.DeclareStructType({runtimeArrayID});
editor.SetName(outputStructID, "meshOutput");
// meshOutput *
outputStructPtrType = editor.DeclareType(rdcspv::Pointer(outputStructID, bufferClass));
editor.SetName(outputStructPtrType, "meshOutput_ptr");
// the array is the only element in the output struct, so
// it's at offset 0
editor.AddDecoration(rdcspv::OpMemberDecorate(
outputStructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
// set array stride
editor.AddDecoration(rdcspv::OpDecorate(
runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(bufStride)));
if(storageMode == Binding)
{
// meshOutput *outputData;
outBufferVarID =
editor.AddVariable(rdcspv::OpVariable(outputStructPtrType, editor.MakeId(), bufferClass));
editor.SetName(outBufferVarID, "outputData");
globals.push_back(outBufferVarID);
editor.DecorateStorageBufferStruct(outputStructID);
// set binding
editor.AddDecoration(rdcspv::OpDecorate(
outBufferVarID, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
editor.AddDecoration(rdcspv::OpDecorate(
outBufferVarID, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(0)));
}
else
{
editor.AddDecoration(rdcspv::OpDecorate(outputStructID, rdcspv::Decoration::Block));
}
}
rdcspv::Id uint32Vec3ID = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 3));
rdcspv::Id invocationPtr =
editor.DeclareType(rdcspv::Pointer(uint32Vec3ID, rdcspv::StorageClass::Input));
rdcspv::Id invocationId = editor.AddVariable(
rdcspv::OpVariable(invocationPtr, editor.MakeId(), rdcspv::StorageClass::Input));
editor.AddDecoration(rdcspv::OpDecorate(
invocationId,
rdcspv::DecorationParam<rdcspv::Decoration::BuiltIn>(rdcspv::BuiltIn::GlobalInvocationId)));
globals.push_back(invocationId);
editor.SetName(invocationId, "rdoc_invocation");
// make a new entry point that will call the old function, then when it returns extract & write
// the outputs.
rdcspv::Id wrapperEntry = editor.MakeId();
// don't set a debug name, as some drivers get confused when this doesn't match the entry point
// name :(.
// editor.SetName(wrapperEntry, "RenderDoc_MeshFetch_Wrapper_Entrypoint");
// if we're not using all globals, this is only Input variables so only our invocation Id and any
// builtins we kept
if(!editor.EntryPointAllGlobals())
{
globals = {invocationId};
for(rdcspv::Id id : builtinKeeps)
globals.push_back(id);
}
// insert the new patched entry point with the globals
editor.AddOperation(editor.Begin(rdcspv::Section::EntryPoints),
rdcspv::OpEntryPoint(rdcspv::ExecutionModel::GLCompute, wrapperEntry,
PatchedMeshOutputEntryPoint, globals));
// Strip away any execution modes from the original shaders
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::ExecutionMode);
it < editor.End(rdcspv::Section::ExecutionMode); ++it)
{
if(it.opcode() == rdcspv::Op::ExecutionMode)
{
rdcspv::OpExecutionMode execMode(it);
// We only need to be cautious about what we are stripping for the entry
// that we are actually translating, the rest aren't used anyways.
if(execMode.entryPoint == entryID)
{
// Lets check to make sure we don't blindly strip away execution modes that
// might actually have an impact on the behaviour of the shader.
switch(execMode.mode)
{
// these execution modes should be applied to our entry point
case rdcspv::ExecutionMode::DenormPreserve:
case rdcspv::ExecutionMode::DenormFlushToZero:
case rdcspv::ExecutionMode::SignedZeroInfNanPreserve:
case rdcspv::ExecutionMode::RoundingModeRTE:
case rdcspv::ExecutionMode::RoundingModeRTZ:
case rdcspv::ExecutionMode::SubgroupUniformControlFlowKHR:
editor.AddExecutionMode(rdcspv::OpExecutionMode(
wrapperEntry, rdcspv::ExecutionModeAndParamData(execMode.mode)));
break;
case rdcspv::ExecutionMode::Xfb: break;
default: RDCERR("Unexpected execution mode");
}
}
editor.Remove(it);
}
}
// Add our compute shader execution mode
editor.AddExecutionMode(rdcspv::OpExecutionMode(
wrapperEntry,
rdcspv::ExecutionModeParam<rdcspv::ExecutionMode::LocalSize>(MeshOutputDispatchWidth, 1, 1)));
rdcspv::Id zero = editor.AddConstantImmediate<uint32_t>(0);
rdcspv::MemoryAccessAndParamDatas memoryAccess;
// add the wrapper function
{
rdcspv::OperationList ops;
rdcspv::Id voidType = editor.DeclareType(rdcspv::scalar<void>());
rdcspv::Id funcType = editor.DeclareType(rdcspv::FunctionType(voidType, {}));
ops.add(rdcspv::OpFunction(voidType, wrapperEntry, rdcspv::FunctionControl::None, funcType));
ops.add(rdcspv::OpLabel(editor.MakeId()));
{
// convert the pointers here
if(storageMode != Binding)
{
memoryAccess.setAligned(sizeof(uint32_t));
if(ibufferSpecConst != rdcspv::Id())
{
// if we don't have the struct as a bind, we need to cast it from the pointer. In
// KHR_buffer_device_address we bitcast since we store it as a uint2
if(storageMode == KHR_bda)
ibufferVariable =
ops.add(rdcspv::OpBitcast(uintStructPtrType, editor.MakeId(), ibufferSpecConst));
else
ibufferVariable = ops.add(
rdcspv::OpConvertUToPtr(uintStructPtrType, editor.MakeId(), ibufferSpecConst));
editor.SetName(ibufferVariable, "__rd_ibuffer");
}
for(size_t s = 0; s < refl.inputSignature.size(); s++)
{
uint32_t idx = refl.inputSignature[s].regIndex;
if(vbufferSpecConsts[idx] != rdcspv::Id() && vbufferVariables[idx] == rdcspv::Id())
{
if(storageMode == KHR_bda)
vbufferVariables[idx] = ops.add(
rdcspv::OpBitcast(uvec4StructPtrType, editor.MakeId(), vbufferSpecConsts[idx]));
else
vbufferVariables[idx] = ops.add(rdcspv::OpConvertUToPtr(
uvec4StructPtrType, editor.MakeId(), vbufferSpecConsts[idx]));
editor.SetName(vbufferVariables[idx], StringFormat::Fmt("__rd_vbuffers[%u]", idx));
}
}
{
if(storageMode == KHR_bda)
outBufferVarID =
ops.add(rdcspv::OpBitcast(outputStructPtrType, editor.MakeId(), outputSpecConst));
else
outBufferVarID = ops.add(
rdcspv::OpConvertUToPtr(outputStructPtrType, editor.MakeId(), outputSpecConst));
editor.SetName(outBufferVarID, "__rd_outbuf");
}
}
// uint3 invocationVec = gl_GlobalInvocationID;
rdcspv::Id invocationVector =
ops.add(rdcspv::OpLoad(uint32Vec3ID, editor.MakeId(), invocationId));
// uint invocation = invocationVec.x
rdcspv::Id uintInvocationID =
ops.add(rdcspv::OpCompositeExtract(u32Type, editor.MakeId(), invocationVector, {0U}));
// arraySlotID = uintInvocationID;
rdcspv::Id arraySlotID = uintInvocationID;
editor.SetName(uintInvocationID, "arraySlot");
// uint viewinst = uintInvocationID / numVerts
rdcspv::Id viewinstID =
ops.add(rdcspv::OpUDiv(u32Type, editor.MakeId(), uintInvocationID, numVertsConstID));
editor.SetName(viewinstID, "viewInstance");
rdcspv::Id instID =
ops.add(rdcspv::OpUMod(u32Type, editor.MakeId(), viewinstID, numInstConstID));
editor.SetName(instID, "instanceID");
rdcspv::Id viewID =
ops.add(rdcspv::OpUDiv(u32Type, editor.MakeId(), viewinstID, numInstConstID));
editor.SetName(viewID, "viewID");
// bool inBounds = viewID < numViews;
rdcspv::Id inBounds = ops.add(rdcspv::OpULessThan(editor.DeclareType(rdcspv::scalar<bool>()),
editor.MakeId(), viewID, numViewsConstID));
// if(inBounds) goto continueLabel; else goto killLabel;
rdcspv::Id killLabel = editor.MakeId();
rdcspv::Id continueLabel = editor.MakeId();
ops.add(rdcspv::OpSelectionMerge(killLabel, rdcspv::SelectionControl::None));
ops.add(rdcspv::OpBranchConditional(inBounds, continueLabel, killLabel));
// continueLabel:
ops.add(rdcspv::OpLabel(continueLabel));
// uint vtx = uintInvocationID % numVerts
rdcspv::Id vtxID =
ops.add(rdcspv::OpUMod(u32Type, editor.MakeId(), uintInvocationID, numVertsConstID));
editor.SetName(vtxID, "vertexID");
rdcspv::Id vertexIndexID = vtxID;
// if we're indexing, look up the index buffer. We don't have to apply vertexOffset - it was
// already applied when we read back and uniq-ified the index buffer.
if(action->flags & ActionFlags::Indexed)
{
rdcspv::Id idxPtr;
// idxptr = &ibuffer.member0[vertexIndex]
idxPtr = ops.add(rdcspv::OpAccessChain(uintPtrType, editor.MakeId(), ibufferVariable,
{zero, vertexIndexID}));
// vertexIndex = *idxptr
vertexIndexID = ops.add(rdcspv::OpLoad(u32Type, editor.MakeId(), idxPtr, memoryAccess));
}
// we use the current value of vertexIndex and use instID, to lookup per-vertex and
// per-instance attributes. This is because when we fetched the vertex data, we advanced by
// (in non-indexed draws) vertexOffset, and by instanceOffset. Rather than fetching data
// that's only used as padding skipped over by these offsets.
rdcspv::Id vertexLookupID = vertexIndexID;
rdcspv::Id instanceLookupID = instID;
if(!(action->flags & ActionFlags::Indexed))
{
// for non-indexed draws, we manually apply the vertex offset, but here after we used the
// 0-based one to calculate the array slot
vertexIndexID =
ops.add(rdcspv::OpIAdd(u32Type, editor.MakeId(), vtxID,
editor.AddConstantImmediate<uint32_t>(action->vertexOffset)));
}
editor.SetName(vertexIndexID, "vertexIndex");
// instIndex = inst + instOffset
rdcspv::Id instIndexID =
ops.add(rdcspv::OpIAdd(u32Type, editor.MakeId(), instID,
editor.AddConstantImmediate<uint32_t>(action->instanceOffset)));
editor.SetName(instIndexID, "instanceIndex");
rdcspv::Id idxs[64] = {};
for(size_t i = 0; i < refl.inputSignature.size(); i++)
{
VarType vType = refl.inputSignature[i].varType;
ShaderBuiltin builtin = refl.inputSignature[i].systemValue;
if(builtin != ShaderBuiltin::Undefined)
{
rdcspv::Id valueID;
CompType compType = CompType::UInt;
if(builtin == ShaderBuiltin::VertexIndex)
{
valueID = vertexIndexID;
// although for indexed draws we accounted for vertexOffset when looking up fixed
// function vertex inputs, we still need to apply it to the VertexIndex builtin here.
if(action->flags & ActionFlags::Indexed)
{
valueID = ops.add(
rdcspv::OpIAdd(u32Type, editor.MakeId(), valueID,
editor.AddConstantImmediate<uint32_t>(action->vertexOffset)));
}
}
else if(builtin == ShaderBuiltin::InstanceIndex)
{
valueID = instIndexID;
}
else if(builtin == ShaderBuiltin::MultiViewIndex)
{
valueID = viewID;
}
else if(builtin == ShaderBuiltin::ViewportIndex)
{
valueID = viewID;
}
else if(builtin == ShaderBuiltin::BaseVertex)
{
if(action->flags & ActionFlags::Indexed)
{
valueID = editor.AddConstantImmediate<uint32_t>(action->vertexOffset);
}
else
{
valueID = editor.AddConstantImmediate<int32_t>(action->baseVertex);
compType = CompType::SInt;
}
}
else if(builtin == ShaderBuiltin::BaseInstance)
{
valueID = editor.AddConstantImmediate<uint32_t>(action->instanceOffset);
}
else if(builtin == ShaderBuiltin::DrawIndex)
{
valueID = editor.AddConstantImmediate<uint32_t>(action->drawIndex);
}
else if(builtin == ShaderBuiltin::SubgroupEqualMask ||
builtin == ShaderBuiltin::SubgroupGreaterMask ||
builtin == ShaderBuiltin::SubgroupGreaterEqualMask ||
builtin == ShaderBuiltin::SubgroupLessMask ||
builtin == ShaderBuiltin::SubgroupLessEqualMask ||
builtin == ShaderBuiltin::IndexInSubgroup || builtin == ShaderBuiltin::SubgroupSize)
{
// subgroup builtins we left alone, these are still builtins
continue;
}
if(valueID)
{
rdcspv::Id ptr = ins[i].variable;
if(!patchData.inputs[i].accessChain.empty())
{
// for composite types we need to access chain first
rdcarray<rdcspv::Id> chain;
for(uint32_t accessIdx : patchData.inputs[i].accessChain)
{
if(idxs[accessIdx] == 0)
idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);
chain.push_back(idxs[accessIdx]);
}
ptr = ops.add(rdcspv::OpAccessChain(ins[i].privatePtrType, editor.MakeId(),
patchData.inputs[i].ID, chain));
}
if(VarTypeCompType(vType) == compType)
{
ops.add(rdcspv::OpStore(ptr, valueID));
}
else
{
// assume we can just bitcast
rdcspv::Id castedValue =
ops.add(rdcspv::OpBitcast(ins[i].baseType, editor.MakeId(), valueID));
ops.add(rdcspv::OpStore(ptr, castedValue));
}
}
else
{
RDCERR("Unsupported/unsupported built-in input %s", ToStr(builtin).c_str());
}
}
else
{
if(idxs[i] == 0)
idxs[i] = editor.AddConstantImmediate<uint32_t>((uint32_t)i);
if(idxs[refl.inputSignature[i].regIndex] == 0)
idxs[refl.inputSignature[i].regIndex] =
editor.AddConstantImmediate<uint32_t>(refl.inputSignature[i].regIndex);
uint32_t location = refl.inputSignature[i].regIndex;
// idx = vertexIndex
rdcspv::Id idx = vertexLookupID;
// maybe idx = instanceIndex / someDivisor
if(location < instDivisor.size())
{
uint32_t divisor = instDivisor[location];
if(divisor == ~0U)
{
// this magic value indicates vertex-rate data
idx = vertexLookupID;
}
else if(divisor == 0)
{
// if the divisor is 0, all instances read the first value.
idx = editor.AddConstantImmediate<uint32_t>(0);
}
else if(divisor == 1)
{
// if the divisor is 1, it's just regular instancing
idx = instanceLookupID;
}
else
{
// otherwise we divide by the divisor
rdcspv::Id divisorId = editor.AddConstantImmediate<uint32_t>(divisor);
idx = ops.add(rdcspv::OpUDiv(u32Type, editor.MakeId(), instanceLookupID, divisorId));
}
}
if(vType == VarType::Double || vType == VarType::ULong || vType == VarType::SLong)
{
// since 64-bit vlaues are packed into two uints, we need to multiply the index by two
idx = ops.add(rdcspv::OpIMul(u32Type, editor.MakeId(), idx,
editor.AddConstantImmediate<uint32_t>(2)));
}
rdcspv::Id ptrId;
// when we're loading from bindings, the vbuffers variable is an array of N structs each
// containing uvec4[],
// when we're using buffer device address we have one variable per vbuffer and it's a
// plain uvec4*
// uvec4 *vertex = &vbuffers[reg].member0[idx]
if(storageMode == Binding)
ptrId =
ops.add(rdcspv::OpAccessChain(uvec4PtrType, editor.MakeId(), vbuffersVariable,
{idxs[refl.inputSignature[i].regIndex], zero, idx}));
else
// uvec4 *vertex = &vbufferN.member0[idx]
ptrId = ops.add(rdcspv::OpAccessChain(uvec4PtrType, editor.MakeId(),
vbufferVariables[refl.inputSignature[i].regIndex],
{zero, idx}));
// uvec4 result = *vertex
rdcspv::Id result =
ops.add(rdcspv::OpLoad(uvec4Type, editor.MakeId(), ptrId, memoryAccess));
// if we want this as ivec4 or vec4, bitcast now
if(ins[i].fetchVec4Type != uvec4Type)
result = ops.add(rdcspv::OpBitcast(ins[i].fetchVec4Type, editor.MakeId(), result));
// we always fetch as full 32-bit values, but if the input was declared as a different
// size (typically ushort or half) then convert here
if(ins[i].fetchVec4Type != ins[i].vec4Type)
{
if(VarTypeCompType(vType) == CompType::Float)
result = ops.add(rdcspv::OpFConvert(ins[i].vec4Type, editor.MakeId(), result));
else if(VarTypeCompType(vType) == CompType::UInt)
result = ops.add(rdcspv::OpUConvert(ins[i].vec4Type, editor.MakeId(), result));
else
result = ops.add(rdcspv::OpSConvert(ins[i].vec4Type, editor.MakeId(), result));
}
uint32_t comp = Bits::CountTrailingZeroes(uint32_t(refl.inputSignature[i].regChannelMask));
if(vType == VarType::Double || vType == VarType::ULong || vType == VarType::SLong)
{
// since 64-bit values are packed into two uints, we now need to fetch more data and do
// packing. We can fetch the data unconditionally since it's harmless to read out of the
// bounds of the buffer
rdcspv::Id nextidx = ops.add(rdcspv::OpIAdd(u32Type, editor.MakeId(), idx,
editor.AddConstantImmediate<uint32_t>(1)));
// uvec4 *vertex = &vbuffers[reg].member0[nextidx]
if(storageMode == Binding)
ptrId = ops.add(
rdcspv::OpAccessChain(uvec4PtrType, editor.MakeId(), vbuffersVariable,
{idxs[refl.inputSignature[i].regIndex], zero, nextidx}));
else
// uvec4 *vertex = &vbufferN.member0[nextidx]
ptrId = ops.add(rdcspv::OpAccessChain(
uvec4PtrType, editor.MakeId(), vbufferVariables[refl.inputSignature[i].regIndex],
{zero, nextidx}));
rdcspv::Id result2 =
ops.add(rdcspv::OpLoad(uvec4Type, editor.MakeId(), ptrId, memoryAccess));
rdcspv::Id glsl450 = editor.ImportExtInst("GLSL.std.450");
rdcspv::Id uvec2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
rdcspv::Id comps[4] = {};
for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
{
// first extract the uvec2 we want
// uvec2 packed = result.[xy/zw] / result2.[xy/zw];
rdcspv::Id packed = ops.add(rdcspv::OpVectorShuffle(
uvec2Type, editor.MakeId(), result, result2, {c * 2 + 0, c * 2 + 1}));
char swizzle[] = "xyzw";
editor.SetName(packed, StringFormat::Fmt("packed_%c", swizzle[c]));
if(vType == VarType::Double)
{
// double comp = PackDouble2x32(packed);
comps[c] = ops.add(rdcspv::OpGLSL450(editor.DeclareType(rdcspv::scalar<double>()),
editor.MakeId(), glsl450,
rdcspv::GLSLstd450::PackDouble2x32, {packed}));
}
else
{
rdcspv::Scalar s = (vType == VarType::ULong) ? rdcspv::scalar<uint64_t>()
: rdcspv::scalar<int64_t>();
// [u]int64 comp = Bitcast(packed);
comps[c] = ops.add(rdcspv::OpBitcast(editor.DeclareType(s), editor.MakeId(), packed));
}
}
// if there's only one component it's ready, otherwise construct a vector
if(refl.inputSignature[i].compCount == 1)
{
result = comps[0];
}
else
{
rdcarray<rdcspv::Id> ids;
for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
ids.push_back(comps[c]);
// baseTypeN value = result.xyz;
result = ops.add(rdcspv::OpCompositeConstruct(ins[i].baseType, editor.MakeId(), ids));
}
}
else if(refl.inputSignature[i].compCount == 1)
{
// for one component, extract x
// baseType value = result.x;
result =
ops.add(rdcspv::OpCompositeExtract(ins[i].baseType, editor.MakeId(), result, {comp}));
}
else if(refl.inputSignature[i].compCount != 4)
{
// for less than 4 components, extract the sub-vector
rdcarray<uint32_t> swizzle;
for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
swizzle.push_back(c + comp);
// baseTypeN value = result.xyz;
result = ops.add(
rdcspv::OpVectorShuffle(ins[i].baseType, editor.MakeId(), result, result, swizzle));
}
// copy the 4 component result directly
// not a composite type, we can store directly
if(patchData.inputs[i].accessChain.empty())
{
// *global = value
ops.add(rdcspv::OpStore(ins[i].variable, result));
}
else
{
// for composite types we need to access chain first
rdcarray<rdcspv::Id> chain;
for(uint32_t accessIdx : patchData.inputs[i].accessChain)
{
if(idxs[accessIdx] == 0)
idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);
chain.push_back(idxs[accessIdx]);
}
rdcspv::Id subElement = ops.add(rdcspv::OpAccessChain(
ins[i].privatePtrType, editor.MakeId(), patchData.inputs[i].ID, chain));
ops.add(rdcspv::OpStore(subElement, result));
}
}
}
// real_main();
ops.add(rdcspv::OpFunctionCall(voidType, editor.MakeId(), entryID));
for(uint32_t o = 0; o < numOutputs; o++)
{
rdcspv::Id loaded;
// not a structure member or array child, can load directly
if(patchData.outputs[o].accessChain.empty())
{
// type loaded = *globalvar;
loaded =
ops.add(rdcspv::OpLoad(outs[o].baseType, editor.MakeId(), patchData.outputs[o].ID));
}
else
{
// structure member, need to access chain first
rdcarray<rdcspv::Id> chain;
for(uint32_t idx : patchData.outputs[o].accessChain)
{
if(idxs[idx] == 0)
idxs[idx] = editor.AddConstantImmediate<uint32_t>(idx);
chain.push_back(idxs[idx]);
}
// type *readPtr = globalvar.globalsub...;
rdcspv::Id readPtr = ops.add(rdcspv::OpAccessChain(
outs[o].privatePtrType, editor.MakeId(), patchData.outputs[o].ID, chain));
// type loaded = *readPtr;
loaded = ops.add(rdcspv::OpLoad(outs[o].baseType, editor.MakeId(), readPtr));
}
// access chain the destination
rdcspv::Id writePtr;
// type *writePtr = &outBuffer.verts[arraySlot].outputN
writePtr = ops.add(rdcspv::OpAccessChain(outs[o].ssboPtrType, editor.MakeId(), outBufferVarID,
{zero, arraySlotID, outs[o].indexConst}));
// *writePtr = loaded;
ops.add(rdcspv::OpStore(writePtr, loaded, memoryAccess));
}
// goto killLabel;
ops.add(rdcspv::OpBranch(killLabel));
// killLabel:
ops.add(rdcspv::OpLabel(killLabel));
}
ops.add(rdcspv::OpReturn());
ops.add(rdcspv::OpFunctionEnd());
editor.AddFunction(ops);
}
}