static void ConvertToMeshOutputCompute()

in renderdoc/driver/vulkan/vk_postvs.cpp [77:1472]


static void ConvertToMeshOutputCompute(const ShaderReflection &refl,
                                       const SPIRVPatchData &patchData, const rdcstr &entryName,
                                       StorageMode storageMode, rdcarray<uint32_t> instDivisor,
                                       const ActionDescription *action, uint32_t numVerts,
                                       uint32_t numViews, uint32_t baseSpecConstant,
                                       rdcarray<uint32_t> &modSpirv, uint32_t &bufStride)
{
  rdcspv::Editor editor(modSpirv);

  editor.Prepare();

  uint32_t numInputs = (uint32_t)refl.inputSignature.size();

  uint32_t numOutputs = (uint32_t)refl.outputSignature.size();
  RDCASSERT(numOutputs > 0);

  if(storageMode == Binding)
  {
    for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
                     end = editor.End(rdcspv::Section::Annotations);
        it < end; ++it)
    {
      // we will use descriptor set 0 bindings 0..N for our own purposes when not using buffer
      // device address.
      //
      // Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
      // redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
      // where all descriptor sets are already used. In theory we only have to do this with set 0,
      // but that requires knowing which variables are in set 0 and it's simpler to increase all
      // bindings.
      if(it.opcode() == rdcspv::Op::Decorate)
      {
        rdcspv::OpDecorate dec(it);
        if(dec.decoration == rdcspv::Decoration::Binding)
        {
          RDCASSERT(dec.decoration.binding < (0xffffffff - MeshOutputReservedBindings));
          dec.decoration.binding += MeshOutputReservedBindings;
          it = dec;
        }
      }
    }
  }

  struct inputOutputIDs
  {
    // if this is a builtin value, what builtin value is expected
    ShaderBuiltin builtin = ShaderBuiltin::Undefined;
    // ID of the variable itself. This is the original Input/Output pointer variable that we convert
    // to a private pointer
    rdcspv::Id variable;
    // constant ID for the index of this attribute
    rdcspv::Id indexConst;
    // base gvec4 type for this input. We always fetch uvec4 from the buffer but then bitcast to
    // vec4 or ivec4 if needed
    rdcspv::Id fetchVec4Type;
    // the actual gvec4 type for the input, possibly needed to convert to from the above if it's
    // declared as a 16-bit type since we always fetch 32-bit.
    rdcspv::Id vec4Type;
    // the base type for this attribute. Must be present already by definition! This is the same
    // scalar type as vec4Type but with the correct number of components.
    rdcspv::Id baseType;
    // Uniform Pointer type ID for this output. Used only for output data, to write to output SSBO
    rdcspv::Id ssboPtrType;
    // Output Pointer type ID for this attribute.
    // For inputs, used to 'write' to the global at the start.
    // For outputs, used to 'read' from the global at the end.
    rdcspv::Id privatePtrType;
  };

  rdcarray<inputOutputIDs> ins;
  ins.resize(numInputs);
  rdcarray<inputOutputIDs> outs;
  outs.resize(numOutputs);

  std::set<rdcspv::Id> inputs;
  std::set<rdcspv::Id> outputs;

  std::map<rdcspv::Id, rdcspv::Id> typeReplacements;

  // keep track of any builtins we're preserving
  std::set<rdcspv::Id> builtinKeeps;

  // detect builtin inputs or outputs, and remove builtin decorations
  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
                   end = editor.End(rdcspv::Section::Annotations);
      it < end; ++it)
  {
    if(it.opcode() == rdcspv::Op::Decorate)
    {
      rdcspv::OpDecorate decorate(it);
      // remove any builtin decorations
      if(decorate.decoration == rdcspv::Decoration::BuiltIn)
      {
        // subgroup builtins can be allowed to stay
        if(decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupEqMask ||
           decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupGtMask ||
           decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupGeMask ||
           decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupLtMask ||
           decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupLeMask ||
           decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupLocalInvocationId ||
           decorate.decoration.builtIn == rdcspv::BuiltIn::SubgroupSize)
        {
          builtinKeeps.insert(decorate.target);
          continue;
        }

        // we don't have to do anything, the ID mapping is in the rdcspv::PatchData, so just discard
        // the location information
        editor.Remove(it);
      }
      // remove all invariant decorations
      else if(decorate.decoration == rdcspv::Decoration::Invariant)
      {
        editor.Remove(it);
      }
      // remove all index decorations
      else if(decorate.decoration == rdcspv::Decoration::Index)
      {
        editor.Remove(it);
      }
      // same with flat/noperspective
      else if(decorate.decoration == rdcspv::Decoration::Flat ||
              decorate.decoration == rdcspv::Decoration::NoPerspective)
      {
        editor.Remove(it);
      }
      else if(decorate.decoration == rdcspv::Decoration::Location ||
              decorate.decoration == rdcspv::Decoration::Component)
      {
        // we don't have to do anything, the ID mapping is in the rdcspv::PatchData, so just discard
        // the location information
        editor.Remove(it);
      }
    }

    if(it.opcode() == rdcspv::Op::MemberDecorate)
    {
      rdcspv::OpMemberDecorate memberDecorate(it);
      if(memberDecorate.decoration == rdcspv::Decoration::BuiltIn)
        editor.Remove(it);
    }
  }

  // rewrite any inputs and outputs to be private storage class
  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::TypesVariablesConstants),
                   end = editor.End(rdcspv::Section::TypesVariablesConstants);
      it < end; ++it)
  {
    // rewrite any input/output variables to private, and build up inputs/outputs list
    if(it.opcode() == rdcspv::Op::TypePointer)
    {
      rdcspv::OpTypePointer ptr(it);

      rdcspv::Id id;

      if(ptr.storageClass == rdcspv::StorageClass::Input)
      {
        id = ptr.result;
        inputs.insert(id);
      }
      else if(ptr.storageClass == rdcspv::StorageClass::Output)
      {
        id = ptr.result;
        outputs.insert(id);

        rdcspv::Iter baseIt = editor.GetID(ptr.type);
        if(baseIt && baseIt.opcode() == rdcspv::Op::TypeStruct)
          outputs.insert(ptr.type);
      }
      else if(ptr.storageClass == rdcspv::StorageClass::Private ||
              ptr.storageClass == rdcspv::StorageClass::Function)
      {
        // with variable pointers, we could have a private/function pointer into one of the pointer
        // types we've replaced (e.g. Input and Output where one is patched to be private and the
        // other is replaced since we deduplicate pointer types)
        //
        // we don't have to re-order the declaration, since we're iterating the types in order so
        // the replacement is always earlier than the type it was replacing

        if(typeReplacements.find(ptr.type) != typeReplacements.end())
        {
          editor.PreModify(it);

          ptr.type = typeReplacements[ptr.type];
          it = ptr;

          // if we didn't already have this pointer, process the modified type declaration
          editor.PostModify(it);
        }
      }

      if(id)
      {
        rdcspv::Pointer privPtr(ptr.type, rdcspv::StorageClass::Private);

        rdcspv::Id origId = editor.GetType(privPtr);

        if(origId)
        {
          // if we already had a private pointer for this type, we have to use that type - we can't
          // create a new type by aliasing. Thus we need to replace any uses of 'id' with 'origId'.
          typeReplacements[id] = origId;

          // and remove this type declaration
          editor.Remove(it);
        }
        else
        {
          editor.PreModify(it);

          ptr.storageClass = rdcspv::StorageClass::Private;
          it = ptr;

          // if we didn't already have this pointer, process the modified type declaration
          editor.PostModify(it);
        }
      }
    }
    else if(it.opcode() == rdcspv::Op::Variable)
    {
      rdcspv::OpVariable var(it);

      bool mod = false;

      if(builtinKeeps.find(var.result) != builtinKeeps.end())
      {
        // if this variable is one we're keeping as a builtin, we need to do something different.
        // We don't change its storage class, but we might need to redeclare the pointer as the
        // right matching storage class (because it's been patched to private). This might be
        editor.PreModify(it);

        rdcspv::Id ptrId = var.resultType;
        // if this is in typeReplacements the id is no longer valid and was removed
        auto replIt = typeReplacements.find(ptrId);
        if(replIt != typeReplacements.end())
          ptrId = replIt->second;

        rdcspv::OpTypePointer ptr(editor.GetID(ptrId));

        // declare if necessary the right pointer again, and use that as our type
        var.resultType = editor.DeclareType(rdcspv::Pointer(ptr.type, var.storageClass));

        it = var;
        editor.PostModify(it);

        // copy this variable declaration to the end of the section, after our potentially 'new'
        // recreated pointer type

        rdcspv::Operation op = rdcspv::Operation::copy(it);
        editor.Remove(it);
        editor.AddVariable(op);
        // don't need to add this to the globals because if it needed to be in there it already was

        // don't do any of the rest of the processing
        continue;
      }
      else if(var.storageClass == rdcspv::StorageClass::Input)
      {
        mod = true;
        editor.PreModify(it);

        var.storageClass = rdcspv::StorageClass::Private;

        inputs.insert(var.result);
      }
      else if(var.storageClass == rdcspv::StorageClass::Output)
      {
        mod = true;
        editor.PreModify(it);

        var.storageClass = rdcspv::StorageClass::Private;

        outputs.insert(var.result);
      }

      auto replIt = typeReplacements.find(var.resultType);
      if(replIt != typeReplacements.end())
      {
        if(!mod)
          editor.PreModify(it);
        mod = true;
        var.resultType = replIt->second;
      }

      if(mod)
      {
        it = var;
        editor.PostModify(it);
      }

      // if we repointed this variable to an existing private declaration, we must also move it to
      // the end of the section. The reason being that the private pointer type declared may be
      // declared *after* this variable. There can't be any dependencies on this later in the
      // section because it's a variable not a type, so it's safe to move to the end.
      if(replIt != typeReplacements.end())
      {
        // make a copy of the opcode
        rdcspv::Operation op = rdcspv::Operation::copy(it);
        // remove the old one
        editor.Remove(it);
        // add it anew
        editor.AddVariable(op);
        // don't need to add this to the globals because if it needed to be in there it already was
      }
    }
    else if(it.opcode() == rdcspv::Op::TypeFunction)
    {
      rdcspv::OpTypeFunction func(it);

      bool mod = false;

      auto replIt = typeReplacements.find(func.result);
      if(replIt != typeReplacements.end())
      {
        editor.PreModify(it);
        mod = true;
        func.result = replIt->second;
      }

      for(size_t i = 0; i < func.parameters.size(); i++)
      {
        replIt = typeReplacements.find(func.parameters[i]);
        if(replIt != typeReplacements.end())
        {
          if(!mod)
            editor.PreModify(it);
          mod = true;
          func.parameters[i] = replIt->second;
        }
      }

      if(mod)
      {
        it = func;
        editor.PostModify(it);
      }
    }
    else if(it.opcode() == rdcspv::Op::ConstantNull)
    {
      rdcspv::OpConstantNull nullconst(it);

      auto replIt = typeReplacements.find(nullconst.resultType);
      if(replIt != typeReplacements.end())
      {
        editor.PreModify(it);
        nullconst.resultType = replIt->second;
        it = nullconst;
        editor.PostModify(it);
      }
    }
    else if(it.opcode() == rdcspv::Op::Undef)
    {
      rdcspv::OpUndef undef(it);

      auto replIt = typeReplacements.find(undef.resultType);
      if(replIt != typeReplacements.end())
      {
        editor.PreModify(it);
        undef.resultType = replIt->second;
        it = undef;
        editor.PostModify(it);
      }
    }
  }

  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Functions); it; ++it)
  {
    // identify functions with result types we might want to replace
    if(it.opcode() == rdcspv::Op::Function || it.opcode() == rdcspv::Op::FunctionParameter ||
       it.opcode() == rdcspv::Op::Variable || it.opcode() == rdcspv::Op::AccessChain ||
       it.opcode() == rdcspv::Op::InBoundsAccessChain || it.opcode() == rdcspv::Op::Bitcast ||
       it.opcode() == rdcspv::Op::Undef || it.opcode() == rdcspv::Op::ExtInst ||
       it.opcode() == rdcspv::Op::FunctionCall || it.opcode() == rdcspv::Op::Phi ||
       it.opcode() == rdcspv::Op::Select)
    {
      editor.PreModify(it);

      rdcspv::Id id = rdcspv::Id::fromWord(it.word(1));
      auto replIt = typeReplacements.find(id);
      if(replIt != typeReplacements.end())
        id = replIt->second;
      it.word(1) = id.value();

      editor.PostModify(it);
    }
  }

  rdcspv::Id entryID;

  std::set<rdcspv::Id> entries;

  for(const rdcspv::EntryPoint &entry : editor.GetEntries())
  {
    if(entry.name == entryName && entry.executionModel == rdcspv::ExecutionModel::Vertex)
      entryID = entry.id;

    entries.insert(entry.id);
  }

  RDCASSERT(entryID);

  // tracks all the global variables we use, for compliance with SPIR-V 1.4.
  rdcarray<rdcspv::Id> globals;

  // we remove all entry points, we'll create one of our own.
  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::EntryPoints),
                   end = editor.End(rdcspv::Section::EntryPoints);
      it < end; ++it)
  {
    rdcspv::OpEntryPoint entry(it);

    // when we find the entry point we're patching, grab it's interface for the set of globals. We
    // will be patching and Input/Output variables to private, but from SPIR-V 1.4 the interface
    // needs to include privates as well.
    if(entry.entryPoint == entryID)
      globals = entry.iface;

    editor.Remove(it);
  }

  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::ExecutionMode),
                   end = editor.End(rdcspv::Section::ExecutionMode);
      it < end; ++it)
  {
    // this can also handle ExecutionModeId and we don't care about the difference
    rdcspv::OpExecutionMode execMode(it);

    if(execMode.entryPoint != entryID)
      editor.Remove(it);
  }

  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::DebugNames),
                   end2 = editor.End(rdcspv::Section::DebugNames);
      it < end2; ++it)
  {
    if(it.opcode() == rdcspv::Op::Name)
    {
      rdcspv::OpName name(it);

      if(inputs.find(name.target) != inputs.end() || outputs.find(name.target) != outputs.end())
      {
        editor.Remove(it);
        if(typeReplacements.find(name.target) == typeReplacements.end())
          editor.SetName(name.target, "emulated_" + name.name);
      }

      // remove any OpName for the old entry points
      if(entries.find(name.target) != entries.end())
        editor.Remove(it);

      // remove any OpName for deleted types
      if(typeReplacements.find(name.target) != typeReplacements.end())
        editor.Remove(it);
    }
  }

  rdcspv::StorageClass bufferClass;
  if(storageMode == Binding)
    bufferClass = editor.StorageBufferClass();
  else
    bufferClass = rdcspv::StorageClass::PhysicalStorageBuffer;

  // declare necessary variables per-output, types and constants. We do this last so that we don't
  // add a private pointer that we later try and deduplicate when collapsing output/input pointers
  // to private
  for(uint32_t i = 0; i < numOutputs; i++)
  {
    inputOutputIDs &io = outs[i];

    io.builtin = refl.outputSignature[i].systemValue;

    // constant for this index
    io.indexConst = editor.AddConstantImmediate(i);

    io.variable = patchData.outputs[i].ID;

    // base type - either a scalar or a vector, since matrix outputs are decayed to vectors
    {
      rdcspv::Scalar scalarType = rdcspv::scalar(refl.outputSignature[i].varType);

      io.vec4Type = editor.DeclareType(rdcspv::Vector(scalarType, 4));

      if(refl.outputSignature[i].compCount > 1)
        io.baseType =
            editor.DeclareType(rdcspv::Vector(scalarType, refl.outputSignature[i].compCount));
      else
        io.baseType = editor.DeclareType(scalarType);
    }

    io.ssboPtrType = editor.DeclareType(rdcspv::Pointer(io.baseType, bufferClass));
    io.privatePtrType =
        editor.DeclareType(rdcspv::Pointer(io.baseType, rdcspv::StorageClass::Private));

    RDCASSERT(io.baseType && io.vec4Type && io.indexConst && io.privatePtrType && io.ssboPtrType,
              io.baseType, io.vec4Type, io.indexConst, io.privatePtrType, io.ssboPtrType);
  }

  // repeat for inputs
  for(uint32_t i = 0; i < numInputs; i++)
  {
    inputOutputIDs &io = ins[i];

    io.builtin = refl.inputSignature[i].systemValue;

    // constant for this index
    io.indexConst = editor.AddConstantImmediate(i);

    io.variable = patchData.inputs[i].ID;

    VarType vType = refl.inputSignature[i].varType;

    rdcspv::Scalar scalarType = rdcspv::scalar(vType);

    // 64-bit values are loaded as uvec4 and then packed in pairs, so we need to declare vec4ID as
    // uvec4
    if(vType == VarType::Double || vType == VarType::ULong || vType == VarType::SLong)
    {
      io.fetchVec4Type = io.vec4Type =
          editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
    }
    else
    {
      io.vec4Type = editor.DeclareType(rdcspv::Vector(scalarType, 4));

      // if the underlying scalar is actually
      switch(vType)
      {
        case VarType::Half:
          io.fetchVec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 4));
          break;
        case VarType::SShort:
        case VarType::SByte:
          io.fetchVec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<int32_t>(), 4));
          break;
        case VarType::UShort:
        case VarType::UByte:
          io.fetchVec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
          break;
        default: io.fetchVec4Type = io.vec4Type; break;
      }
    }

    if(refl.inputSignature[i].compCount > 1)
      io.baseType = editor.DeclareType(rdcspv::Vector(scalarType, refl.inputSignature[i].compCount));
    else
      io.baseType = editor.DeclareType(scalarType);

    io.privatePtrType =
        editor.DeclareType(rdcspv::Pointer(io.baseType, rdcspv::StorageClass::Private));

    RDCASSERT(io.baseType && io.vec4Type && io.indexConst && io.privatePtrType, io.baseType,
              io.vec4Type, io.indexConst, io.privatePtrType);
  }

  rdcspv::Id u32Type = editor.DeclareType(rdcspv::scalar<uint32_t>());
  rdcspv::Id uvec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));

  rdcspv::Id uvec4StructPtrType;
  rdcspv::Id uintStructPtrType;

  rdcspv::Id arraySize = editor.AddConstantImmediate<uint32_t>(MeshOutputBufferArraySize);

  rdcspv::Id vbuffersVariable, ibufferVariable;

  rdcarray<rdcspv::Id> vbufferSpecConsts;
  rdcarray<rdcspv::Id> vbufferVariables;
  vbufferSpecConsts.resize(MeshOutputBufferArraySize);
  vbufferVariables.resize(MeshOutputBufferArraySize);
  rdcspv::Id ibufferSpecConst;
  rdcspv::Id outputSpecConst;

  {
    rdcspv::Id runtimeArrayID =
        editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), uvec4Type));
    editor.AddDecoration(rdcspv::OpDecorate(
        runtimeArrayID,
        rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t) * 4)));

    rdcspv::Id uvec4StructType =
        editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
    editor.SetName(uvec4StructType, "__rd_uvec4Struct");

    editor.AddDecoration(rdcspv::OpMemberDecorate(
        uvec4StructType, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));

    uvec4StructPtrType = editor.DeclareType(rdcspv::Pointer(uvec4StructType, bufferClass));
    editor.SetName(uvec4StructPtrType, "__rd_uvec4Struct_ptr");

    runtimeArrayID = editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), u32Type));
    editor.AddDecoration(rdcspv::OpDecorate(
        runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t))));

    rdcspv::Id uintStructType =
        editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));

    editor.SetName(uintStructType, "__rd_uintStruct");

    editor.AddDecoration(rdcspv::OpMemberDecorate(
        uintStructType, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));

    uintStructPtrType = editor.DeclareType(rdcspv::Pointer(uintStructType, bufferClass));
    editor.SetName(uintStructPtrType, "__rd_uintStruct_ptr");

    if(storageMode == Binding)
    {
      editor.DecorateStorageBufferStruct(uvec4StructType);
      editor.DecorateStorageBufferStruct(uintStructType);

      rdcspv::Id structArrayType = editor.AddType(
          rdcspv::OpTypeArray(editor.MakeId(), uvec4StructType,
                              editor.AddConstantImmediate<uint32_t>(MeshOutputBufferArraySize)));
      rdcspv::Id vbuffersType = editor.DeclareType(rdcspv::Pointer(structArrayType, bufferClass));

      vbuffersVariable = editor.MakeId();
      editor.AddVariable(rdcspv::OpVariable(vbuffersType, vbuffersVariable, bufferClass));
      editor.AddDecoration(rdcspv::OpDecorate(
          vbuffersVariable, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
      editor.AddDecoration(rdcspv::OpDecorate(
          vbuffersVariable, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(2)));

      globals.push_back(vbuffersVariable);

      editor.SetName(vbuffersVariable, "__rd_vbuffers");

      if(action->flags & ActionFlags::Indexed)
      {
        rdcspv::Id ibufferType = editor.DeclareType(rdcspv::Pointer(uintStructType, bufferClass));

        ibufferVariable = editor.MakeId();
        editor.AddVariable(rdcspv::OpVariable(ibufferType, ibufferVariable, bufferClass));
        editor.AddDecoration(rdcspv::OpDecorate(
            ibufferVariable, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
        editor.AddDecoration(rdcspv::OpDecorate(
            ibufferVariable, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(1)));

        globals.push_back(ibufferVariable);

        editor.SetName(ibufferVariable, "__rd_ibuffer");
      }
    }
    else
    {
      editor.AddDecoration(rdcspv::OpDecorate(uvec4StructType, rdcspv::Decoration::Block));
      editor.AddDecoration(rdcspv::OpDecorate(uintStructType, rdcspv::Decoration::Block));

      // add the extension
      editor.AddExtension(storageMode == KHR_bda ? "SPV_KHR_physical_storage_buffer"
                                                 : "SPV_EXT_physical_storage_buffer");

      // change the memory model to physical storage buffer 64
      rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
      rdcspv::OpMemoryModel model(it);
      model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
      it = model;

      // add capabilities
      editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);

      if(storageMode == EXT_bda)
        editor.AddCapability(rdcspv::Capability::Int64);

      for(uint32_t i = 0; i <= MeshOutputBufferArraySize + 1; i++)
      {
        rdcspv::Id *dstId = NULL;
        if(i < MeshOutputBufferArraySize)
          dstId = &vbufferSpecConsts[i];
        else if(i == MeshOutputBufferArraySize)
          dstId = &ibufferSpecConst;
        else if(i == MeshOutputBufferArraySize + 1)
          dstId = &outputSpecConst;

        if(!dstId)
          break;

        if(storageMode == KHR_bda)
        {
          rdcspv::Id addressConstantLSB =
              editor.AddSpecConstantImmediate<uint32_t>(0U, baseSpecConstant + i * 2 + 0);
          rdcspv::Id addressConstantMSB =
              editor.AddSpecConstantImmediate<uint32_t>(0U, baseSpecConstant + i * 2 + 1);

          rdcspv::Id uint2 = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));

          *dstId = editor.AddConstant(rdcspv::OpSpecConstantComposite(
              uint2, editor.MakeId(), {addressConstantLSB, addressConstantMSB}));
        }
        else
        {
          *dstId = editor.AddSpecConstantImmediate<uint64_t>(0ULL, baseSpecConstant + i * 2);
        }

        if(i == MeshOutputBufferArraySize)
          editor.SetName(*dstId, "__rd_ibufferConst");
        else
          editor.SetName(*dstId, StringFormat::Fmt("__rd_vbufferConst%u", i));
      }
    }
  }

  rdcspv::Id uvec4PtrType = editor.DeclareType(rdcspv::Pointer(uvec4Type, bufferClass));
  rdcspv::Id uintPtrType = editor.DeclareType(rdcspv::Pointer(u32Type, bufferClass));

  if(numInputs > 0)
  {
    editor.AddCapability(rdcspv::Capability::SampledBuffer);
  }

  rdcspv::Id outBufferVarID;
  rdcspv::Id outputStructPtrType;
  rdcspv::Id numVertsConstID = editor.AddConstantImmediate<uint32_t>(numVerts);
  rdcspv::Id numInstConstID = editor.AddConstantImmediate<uint32_t>(action->numInstances);
  rdcspv::Id numViewsConstID = editor.AddConstantImmediate<uint32_t>(numViews);

  editor.SetName(numVertsConstID, "numVerts");
  editor.SetName(numInstConstID, "numInsts");
  editor.SetName(numViewsConstID, "numViews");

  // declare the output buffer and its type
  {
    rdcarray<rdcspv::Id> members;
    for(uint32_t o = 0; o < numOutputs; o++)
      members.push_back(outs[o].baseType);

    // struct vertex { ... outputs };
    rdcspv::Id vertStructID = editor.DeclareStructType(members);
    editor.SetName(vertStructID, "vertex_struct");

    // vertex vertArray[];
    rdcspv::Id runtimeArrayID =
        editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), vertStructID));
    editor.SetName(runtimeArrayID, "vertex_array");

    uint32_t memberOffset = 0;
    for(uint32_t o = 0; o < numOutputs; o++)
    {
      uint32_t elemSize = RDCMAX(4U, VarTypeByteSize(refl.outputSignature[o].varType));

      uint32_t numComps = refl.outputSignature[o].compCount;

      // ensure member is std430 packed (vec4 alignment for vec3/vec4)
      if(numComps == 2)
        memberOffset = AlignUp(memberOffset, 2U * elemSize);
      else if(numComps > 2)
        memberOffset = AlignUp(memberOffset, 4U * elemSize);

      // apply decoration to each member in the struct with its offset in the struct
      editor.AddDecoration(rdcspv::OpMemberDecorate(
          vertStructID, o, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(memberOffset)));

      memberOffset += elemSize * refl.outputSignature[o].compCount;
    }

    // align to 16 bytes (vec4) since we will almost certainly have
    // a vec4 in the struct somewhere, and even in std430 alignment,
    // the base struct alignment is still the largest base alignment
    // of any member
    bufStride = AlignUp16(memberOffset);

    // struct meshOutput { vertex vertArray[]; };
    rdcspv::Id outputStructID = editor.DeclareStructType({runtimeArrayID});
    editor.SetName(outputStructID, "meshOutput");

    // meshOutput *
    outputStructPtrType = editor.DeclareType(rdcspv::Pointer(outputStructID, bufferClass));
    editor.SetName(outputStructPtrType, "meshOutput_ptr");

    // the array is the only element in the output struct, so
    // it's at offset 0
    editor.AddDecoration(rdcspv::OpMemberDecorate(
        outputStructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));

    // set array stride
    editor.AddDecoration(rdcspv::OpDecorate(
        runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(bufStride)));

    if(storageMode == Binding)
    {
      // meshOutput *outputData;
      outBufferVarID =
          editor.AddVariable(rdcspv::OpVariable(outputStructPtrType, editor.MakeId(), bufferClass));
      editor.SetName(outBufferVarID, "outputData");

      globals.push_back(outBufferVarID);

      editor.DecorateStorageBufferStruct(outputStructID);

      // set binding
      editor.AddDecoration(rdcspv::OpDecorate(
          outBufferVarID, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
      editor.AddDecoration(rdcspv::OpDecorate(
          outBufferVarID, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(0)));
    }
    else
    {
      editor.AddDecoration(rdcspv::OpDecorate(outputStructID, rdcspv::Decoration::Block));
    }
  }

  rdcspv::Id uint32Vec3ID = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 3));
  rdcspv::Id invocationPtr =
      editor.DeclareType(rdcspv::Pointer(uint32Vec3ID, rdcspv::StorageClass::Input));
  rdcspv::Id invocationId = editor.AddVariable(
      rdcspv::OpVariable(invocationPtr, editor.MakeId(), rdcspv::StorageClass::Input));
  editor.AddDecoration(rdcspv::OpDecorate(
      invocationId,
      rdcspv::DecorationParam<rdcspv::Decoration::BuiltIn>(rdcspv::BuiltIn::GlobalInvocationId)));

  globals.push_back(invocationId);

  editor.SetName(invocationId, "rdoc_invocation");

  // make a new entry point that will call the old function, then when it returns extract & write
  // the outputs.
  rdcspv::Id wrapperEntry = editor.MakeId();
  // don't set a debug name, as some drivers get confused when this doesn't match the entry point
  // name :(.
  // editor.SetName(wrapperEntry, "RenderDoc_MeshFetch_Wrapper_Entrypoint");

  // if we're not using all globals, this is only Input variables so only our invocation Id and any
  // builtins we kept
  if(!editor.EntryPointAllGlobals())
  {
    globals = {invocationId};
    for(rdcspv::Id id : builtinKeeps)
      globals.push_back(id);
  }

  // insert the new patched entry point with the globals
  editor.AddOperation(editor.Begin(rdcspv::Section::EntryPoints),
                      rdcspv::OpEntryPoint(rdcspv::ExecutionModel::GLCompute, wrapperEntry,
                                           PatchedMeshOutputEntryPoint, globals));

  // Strip away any execution modes from the original shaders
  for(rdcspv::Iter it = editor.Begin(rdcspv::Section::ExecutionMode);
      it < editor.End(rdcspv::Section::ExecutionMode); ++it)
  {
    if(it.opcode() == rdcspv::Op::ExecutionMode)
    {
      rdcspv::OpExecutionMode execMode(it);

      // We only need to be cautious about what we are stripping for the entry
      // that we are actually translating, the rest aren't used anyways.
      if(execMode.entryPoint == entryID)
      {
        // Lets check to make sure we don't blindly strip away execution modes that
        // might actually have an impact on the behaviour of the shader.
        switch(execMode.mode)
        {
          // these execution modes should be applied to our entry point
          case rdcspv::ExecutionMode::DenormPreserve:
          case rdcspv::ExecutionMode::DenormFlushToZero:
          case rdcspv::ExecutionMode::SignedZeroInfNanPreserve:
          case rdcspv::ExecutionMode::RoundingModeRTE:
          case rdcspv::ExecutionMode::RoundingModeRTZ:
          case rdcspv::ExecutionMode::SubgroupUniformControlFlowKHR:
            editor.AddExecutionMode(rdcspv::OpExecutionMode(
                wrapperEntry, rdcspv::ExecutionModeAndParamData(execMode.mode)));
            break;
          case rdcspv::ExecutionMode::Xfb: break;
          default: RDCERR("Unexpected execution mode");
        }
      }

      editor.Remove(it);
    }
  }

  // Add our compute shader execution mode
  editor.AddExecutionMode(rdcspv::OpExecutionMode(
      wrapperEntry,
      rdcspv::ExecutionModeParam<rdcspv::ExecutionMode::LocalSize>(MeshOutputDispatchWidth, 1, 1)));

  rdcspv::Id zero = editor.AddConstantImmediate<uint32_t>(0);

  rdcspv::MemoryAccessAndParamDatas memoryAccess;

  // add the wrapper function
  {
    rdcspv::OperationList ops;

    rdcspv::Id voidType = editor.DeclareType(rdcspv::scalar<void>());
    rdcspv::Id funcType = editor.DeclareType(rdcspv::FunctionType(voidType, {}));

    ops.add(rdcspv::OpFunction(voidType, wrapperEntry, rdcspv::FunctionControl::None, funcType));

    ops.add(rdcspv::OpLabel(editor.MakeId()));
    {
      // convert the pointers here
      if(storageMode != Binding)
      {
        memoryAccess.setAligned(sizeof(uint32_t));

        if(ibufferSpecConst != rdcspv::Id())
        {
          // if we don't have the struct as a bind, we need to cast it from the pointer. In
          // KHR_buffer_device_address we bitcast since we store it as a uint2
          if(storageMode == KHR_bda)
            ibufferVariable =
                ops.add(rdcspv::OpBitcast(uintStructPtrType, editor.MakeId(), ibufferSpecConst));
          else
            ibufferVariable = ops.add(
                rdcspv::OpConvertUToPtr(uintStructPtrType, editor.MakeId(), ibufferSpecConst));

          editor.SetName(ibufferVariable, "__rd_ibuffer");
        }

        for(size_t s = 0; s < refl.inputSignature.size(); s++)
        {
          uint32_t idx = refl.inputSignature[s].regIndex;

          if(vbufferSpecConsts[idx] != rdcspv::Id() && vbufferVariables[idx] == rdcspv::Id())
          {
            if(storageMode == KHR_bda)
              vbufferVariables[idx] = ops.add(
                  rdcspv::OpBitcast(uvec4StructPtrType, editor.MakeId(), vbufferSpecConsts[idx]));
            else
              vbufferVariables[idx] = ops.add(rdcspv::OpConvertUToPtr(
                  uvec4StructPtrType, editor.MakeId(), vbufferSpecConsts[idx]));

            editor.SetName(vbufferVariables[idx], StringFormat::Fmt("__rd_vbuffers[%u]", idx));
          }
        }

        {
          if(storageMode == KHR_bda)
            outBufferVarID =
                ops.add(rdcspv::OpBitcast(outputStructPtrType, editor.MakeId(), outputSpecConst));
          else
            outBufferVarID = ops.add(
                rdcspv::OpConvertUToPtr(outputStructPtrType, editor.MakeId(), outputSpecConst));

          editor.SetName(outBufferVarID, "__rd_outbuf");
        }
      }

      // uint3 invocationVec = gl_GlobalInvocationID;
      rdcspv::Id invocationVector =
          ops.add(rdcspv::OpLoad(uint32Vec3ID, editor.MakeId(), invocationId));

      // uint invocation = invocationVec.x
      rdcspv::Id uintInvocationID =
          ops.add(rdcspv::OpCompositeExtract(u32Type, editor.MakeId(), invocationVector, {0U}));

      // arraySlotID = uintInvocationID;
      rdcspv::Id arraySlotID = uintInvocationID;

      editor.SetName(uintInvocationID, "arraySlot");

      // uint viewinst = uintInvocationID / numVerts
      rdcspv::Id viewinstID =
          ops.add(rdcspv::OpUDiv(u32Type, editor.MakeId(), uintInvocationID, numVertsConstID));

      editor.SetName(viewinstID, "viewInstance");

      rdcspv::Id instID =
          ops.add(rdcspv::OpUMod(u32Type, editor.MakeId(), viewinstID, numInstConstID));

      editor.SetName(instID, "instanceID");

      rdcspv::Id viewID =
          ops.add(rdcspv::OpUDiv(u32Type, editor.MakeId(), viewinstID, numInstConstID));

      editor.SetName(viewID, "viewID");

      // bool inBounds = viewID < numViews;
      rdcspv::Id inBounds = ops.add(rdcspv::OpULessThan(editor.DeclareType(rdcspv::scalar<bool>()),
                                                        editor.MakeId(), viewID, numViewsConstID));

      // if(inBounds) goto continueLabel; else goto killLabel;
      rdcspv::Id killLabel = editor.MakeId();
      rdcspv::Id continueLabel = editor.MakeId();
      ops.add(rdcspv::OpSelectionMerge(killLabel, rdcspv::SelectionControl::None));
      ops.add(rdcspv::OpBranchConditional(inBounds, continueLabel, killLabel));

      // continueLabel:
      ops.add(rdcspv::OpLabel(continueLabel));

      // uint vtx = uintInvocationID % numVerts
      rdcspv::Id vtxID =
          ops.add(rdcspv::OpUMod(u32Type, editor.MakeId(), uintInvocationID, numVertsConstID));
      editor.SetName(vtxID, "vertexID");

      rdcspv::Id vertexIndexID = vtxID;

      // if we're indexing, look up the index buffer. We don't have to apply vertexOffset - it was
      // already applied when we read back and uniq-ified the index buffer.
      if(action->flags & ActionFlags::Indexed)
      {
        rdcspv::Id idxPtr;

        // idxptr = &ibuffer.member0[vertexIndex]
        idxPtr = ops.add(rdcspv::OpAccessChain(uintPtrType, editor.MakeId(), ibufferVariable,
                                               {zero, vertexIndexID}));

        // vertexIndex = *idxptr
        vertexIndexID = ops.add(rdcspv::OpLoad(u32Type, editor.MakeId(), idxPtr, memoryAccess));
      }

      // we use the current value of vertexIndex and use instID, to lookup per-vertex and
      // per-instance attributes. This is because when we fetched the vertex data, we advanced by
      // (in non-indexed draws) vertexOffset, and by instanceOffset. Rather than fetching data
      // that's only used as padding skipped over by these offsets.
      rdcspv::Id vertexLookupID = vertexIndexID;
      rdcspv::Id instanceLookupID = instID;

      if(!(action->flags & ActionFlags::Indexed))
      {
        // for non-indexed draws, we manually apply the vertex offset, but here after we used the
        // 0-based one to calculate the array slot
        vertexIndexID =
            ops.add(rdcspv::OpIAdd(u32Type, editor.MakeId(), vtxID,
                                   editor.AddConstantImmediate<uint32_t>(action->vertexOffset)));
      }
      editor.SetName(vertexIndexID, "vertexIndex");

      // instIndex = inst + instOffset
      rdcspv::Id instIndexID =
          ops.add(rdcspv::OpIAdd(u32Type, editor.MakeId(), instID,
                                 editor.AddConstantImmediate<uint32_t>(action->instanceOffset)));
      editor.SetName(instIndexID, "instanceIndex");

      rdcspv::Id idxs[64] = {};

      for(size_t i = 0; i < refl.inputSignature.size(); i++)
      {
        VarType vType = refl.inputSignature[i].varType;

        ShaderBuiltin builtin = refl.inputSignature[i].systemValue;
        if(builtin != ShaderBuiltin::Undefined)
        {
          rdcspv::Id valueID;
          CompType compType = CompType::UInt;

          if(builtin == ShaderBuiltin::VertexIndex)
          {
            valueID = vertexIndexID;
            // although for indexed draws we accounted for vertexOffset when looking up fixed
            // function vertex inputs, we still need to apply it to the VertexIndex builtin here.
            if(action->flags & ActionFlags::Indexed)
            {
              valueID = ops.add(
                  rdcspv::OpIAdd(u32Type, editor.MakeId(), valueID,
                                 editor.AddConstantImmediate<uint32_t>(action->vertexOffset)));
            }
          }
          else if(builtin == ShaderBuiltin::InstanceIndex)
          {
            valueID = instIndexID;
          }
          else if(builtin == ShaderBuiltin::MultiViewIndex)
          {
            valueID = viewID;
          }
          else if(builtin == ShaderBuiltin::ViewportIndex)
          {
            valueID = viewID;
          }
          else if(builtin == ShaderBuiltin::BaseVertex)
          {
            if(action->flags & ActionFlags::Indexed)
            {
              valueID = editor.AddConstantImmediate<uint32_t>(action->vertexOffset);
            }
            else
            {
              valueID = editor.AddConstantImmediate<int32_t>(action->baseVertex);
              compType = CompType::SInt;
            }
          }
          else if(builtin == ShaderBuiltin::BaseInstance)
          {
            valueID = editor.AddConstantImmediate<uint32_t>(action->instanceOffset);
          }
          else if(builtin == ShaderBuiltin::DrawIndex)
          {
            valueID = editor.AddConstantImmediate<uint32_t>(action->drawIndex);
          }
          else if(builtin == ShaderBuiltin::SubgroupEqualMask ||
                  builtin == ShaderBuiltin::SubgroupGreaterMask ||
                  builtin == ShaderBuiltin::SubgroupGreaterEqualMask ||
                  builtin == ShaderBuiltin::SubgroupLessMask ||
                  builtin == ShaderBuiltin::SubgroupLessEqualMask ||
                  builtin == ShaderBuiltin::IndexInSubgroup || builtin == ShaderBuiltin::SubgroupSize)
          {
            // subgroup builtins we left alone, these are still builtins
            continue;
          }

          if(valueID)
          {
            rdcspv::Id ptr = ins[i].variable;

            if(!patchData.inputs[i].accessChain.empty())
            {
              // for composite types we need to access chain first
              rdcarray<rdcspv::Id> chain;

              for(uint32_t accessIdx : patchData.inputs[i].accessChain)
              {
                if(idxs[accessIdx] == 0)
                  idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);

                chain.push_back(idxs[accessIdx]);
              }

              ptr = ops.add(rdcspv::OpAccessChain(ins[i].privatePtrType, editor.MakeId(),
                                                  patchData.inputs[i].ID, chain));
            }

            if(VarTypeCompType(vType) == compType)
            {
              ops.add(rdcspv::OpStore(ptr, valueID));
            }
            else
            {
              // assume we can just bitcast
              rdcspv::Id castedValue =
                  ops.add(rdcspv::OpBitcast(ins[i].baseType, editor.MakeId(), valueID));
              ops.add(rdcspv::OpStore(ptr, castedValue));
            }
          }
          else
          {
            RDCERR("Unsupported/unsupported built-in input %s", ToStr(builtin).c_str());
          }
        }
        else
        {
          if(idxs[i] == 0)
            idxs[i] = editor.AddConstantImmediate<uint32_t>((uint32_t)i);

          if(idxs[refl.inputSignature[i].regIndex] == 0)
            idxs[refl.inputSignature[i].regIndex] =
                editor.AddConstantImmediate<uint32_t>(refl.inputSignature[i].regIndex);

          uint32_t location = refl.inputSignature[i].regIndex;

          // idx = vertexIndex
          rdcspv::Id idx = vertexLookupID;

          // maybe idx = instanceIndex / someDivisor
          if(location < instDivisor.size())
          {
            uint32_t divisor = instDivisor[location];

            if(divisor == ~0U)
            {
              // this magic value indicates vertex-rate data
              idx = vertexLookupID;
            }
            else if(divisor == 0)
            {
              // if the divisor is 0, all instances read the first value.
              idx = editor.AddConstantImmediate<uint32_t>(0);
            }
            else if(divisor == 1)
            {
              // if the divisor is 1, it's just regular instancing
              idx = instanceLookupID;
            }
            else
            {
              // otherwise we divide by the divisor
              rdcspv::Id divisorId = editor.AddConstantImmediate<uint32_t>(divisor);
              idx = ops.add(rdcspv::OpUDiv(u32Type, editor.MakeId(), instanceLookupID, divisorId));
            }
          }

          if(vType == VarType::Double || vType == VarType::ULong || vType == VarType::SLong)
          {
            // since 64-bit vlaues are packed into two uints, we need to multiply the index by two
            idx = ops.add(rdcspv::OpIMul(u32Type, editor.MakeId(), idx,
                                         editor.AddConstantImmediate<uint32_t>(2)));
          }

          rdcspv::Id ptrId;

          // when we're loading from bindings, the vbuffers variable is an array of N structs each
          // containing uvec4[],
          // when we're using buffer device address we have one variable per vbuffer and it's a
          // plain uvec4*

          // uvec4 *vertex = &vbuffers[reg].member0[idx]
          if(storageMode == Binding)
            ptrId =
                ops.add(rdcspv::OpAccessChain(uvec4PtrType, editor.MakeId(), vbuffersVariable,
                                              {idxs[refl.inputSignature[i].regIndex], zero, idx}));
          else
            // uvec4 *vertex = &vbufferN.member0[idx]
            ptrId = ops.add(rdcspv::OpAccessChain(uvec4PtrType, editor.MakeId(),
                                                  vbufferVariables[refl.inputSignature[i].regIndex],
                                                  {zero, idx}));

          // uvec4 result = *vertex
          rdcspv::Id result =
              ops.add(rdcspv::OpLoad(uvec4Type, editor.MakeId(), ptrId, memoryAccess));

          // if we want this as ivec4 or vec4, bitcast now
          if(ins[i].fetchVec4Type != uvec4Type)
            result = ops.add(rdcspv::OpBitcast(ins[i].fetchVec4Type, editor.MakeId(), result));

          // we always fetch as full 32-bit values, but if the input was declared as a different
          // size (typically ushort or half) then convert here
          if(ins[i].fetchVec4Type != ins[i].vec4Type)
          {
            if(VarTypeCompType(vType) == CompType::Float)
              result = ops.add(rdcspv::OpFConvert(ins[i].vec4Type, editor.MakeId(), result));
            else if(VarTypeCompType(vType) == CompType::UInt)
              result = ops.add(rdcspv::OpUConvert(ins[i].vec4Type, editor.MakeId(), result));
            else
              result = ops.add(rdcspv::OpSConvert(ins[i].vec4Type, editor.MakeId(), result));
          }

          uint32_t comp = Bits::CountTrailingZeroes(uint32_t(refl.inputSignature[i].regChannelMask));

          if(vType == VarType::Double || vType == VarType::ULong || vType == VarType::SLong)
          {
            // since 64-bit values are packed into two uints, we now need to fetch more data and do
            // packing. We can fetch the data unconditionally since it's harmless to read out of the
            // bounds of the buffer

            rdcspv::Id nextidx = ops.add(rdcspv::OpIAdd(u32Type, editor.MakeId(), idx,
                                                        editor.AddConstantImmediate<uint32_t>(1)));

            // uvec4 *vertex = &vbuffers[reg].member0[nextidx]
            if(storageMode == Binding)
              ptrId = ops.add(
                  rdcspv::OpAccessChain(uvec4PtrType, editor.MakeId(), vbuffersVariable,
                                        {idxs[refl.inputSignature[i].regIndex], zero, nextidx}));
            else
              // uvec4 *vertex = &vbufferN.member0[nextidx]
              ptrId = ops.add(rdcspv::OpAccessChain(
                  uvec4PtrType, editor.MakeId(), vbufferVariables[refl.inputSignature[i].regIndex],
                  {zero, nextidx}));
            rdcspv::Id result2 =
                ops.add(rdcspv::OpLoad(uvec4Type, editor.MakeId(), ptrId, memoryAccess));

            rdcspv::Id glsl450 = editor.ImportExtInst("GLSL.std.450");

            rdcspv::Id uvec2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
            rdcspv::Id comps[4] = {};

            for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
            {
              // first extract the uvec2 we want

              // uvec2 packed = result.[xy/zw] / result2.[xy/zw];
              rdcspv::Id packed = ops.add(rdcspv::OpVectorShuffle(
                  uvec2Type, editor.MakeId(), result, result2, {c * 2 + 0, c * 2 + 1}));

              char swizzle[] = "xyzw";

              editor.SetName(packed, StringFormat::Fmt("packed_%c", swizzle[c]));

              if(vType == VarType::Double)
              {
                // double comp = PackDouble2x32(packed);
                comps[c] = ops.add(rdcspv::OpGLSL450(editor.DeclareType(rdcspv::scalar<double>()),
                                                     editor.MakeId(), glsl450,
                                                     rdcspv::GLSLstd450::PackDouble2x32, {packed}));
              }
              else
              {
                rdcspv::Scalar s = (vType == VarType::ULong) ? rdcspv::scalar<uint64_t>()
                                                             : rdcspv::scalar<int64_t>();

                // [u]int64 comp = Bitcast(packed);
                comps[c] = ops.add(rdcspv::OpBitcast(editor.DeclareType(s), editor.MakeId(), packed));
              }
            }

            // if there's only one component it's ready, otherwise construct a vector
            if(refl.inputSignature[i].compCount == 1)
            {
              result = comps[0];
            }
            else
            {
              rdcarray<rdcspv::Id> ids;

              for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
                ids.push_back(comps[c]);

              // baseTypeN value = result.xyz;
              result = ops.add(rdcspv::OpCompositeConstruct(ins[i].baseType, editor.MakeId(), ids));
            }
          }
          else if(refl.inputSignature[i].compCount == 1)
          {
            // for one component, extract x

            // baseType value = result.x;
            result =
                ops.add(rdcspv::OpCompositeExtract(ins[i].baseType, editor.MakeId(), result, {comp}));
          }
          else if(refl.inputSignature[i].compCount != 4)
          {
            // for less than 4 components, extract the sub-vector

            rdcarray<uint32_t> swizzle;

            for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
              swizzle.push_back(c + comp);

            // baseTypeN value = result.xyz;
            result = ops.add(
                rdcspv::OpVectorShuffle(ins[i].baseType, editor.MakeId(), result, result, swizzle));
          }

          // copy the 4 component result directly

          // not a composite type, we can store directly
          if(patchData.inputs[i].accessChain.empty())
          {
            // *global = value
            ops.add(rdcspv::OpStore(ins[i].variable, result));
          }
          else
          {
            // for composite types we need to access chain first
            rdcarray<rdcspv::Id> chain;

            for(uint32_t accessIdx : patchData.inputs[i].accessChain)
            {
              if(idxs[accessIdx] == 0)
                idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);

              chain.push_back(idxs[accessIdx]);
            }

            rdcspv::Id subElement = ops.add(rdcspv::OpAccessChain(
                ins[i].privatePtrType, editor.MakeId(), patchData.inputs[i].ID, chain));

            ops.add(rdcspv::OpStore(subElement, result));
          }
        }
      }

      // real_main();
      ops.add(rdcspv::OpFunctionCall(voidType, editor.MakeId(), entryID));

      for(uint32_t o = 0; o < numOutputs; o++)
      {
        rdcspv::Id loaded;

        // not a structure member or array child, can load directly
        if(patchData.outputs[o].accessChain.empty())
        {
          // type loaded = *globalvar;
          loaded =
              ops.add(rdcspv::OpLoad(outs[o].baseType, editor.MakeId(), patchData.outputs[o].ID));
        }
        else
        {
          // structure member, need to access chain first
          rdcarray<rdcspv::Id> chain;

          for(uint32_t idx : patchData.outputs[o].accessChain)
          {
            if(idxs[idx] == 0)
              idxs[idx] = editor.AddConstantImmediate<uint32_t>(idx);

            chain.push_back(idxs[idx]);
          }

          // type *readPtr = globalvar.globalsub...;
          rdcspv::Id readPtr = ops.add(rdcspv::OpAccessChain(
              outs[o].privatePtrType, editor.MakeId(), patchData.outputs[o].ID, chain));
          // type loaded = *readPtr;
          loaded = ops.add(rdcspv::OpLoad(outs[o].baseType, editor.MakeId(), readPtr));
        }

        // access chain the destination
        rdcspv::Id writePtr;

        // type *writePtr = &outBuffer.verts[arraySlot].outputN
        writePtr = ops.add(rdcspv::OpAccessChain(outs[o].ssboPtrType, editor.MakeId(), outBufferVarID,
                                                 {zero, arraySlotID, outs[o].indexConst}));

        // *writePtr = loaded;
        ops.add(rdcspv::OpStore(writePtr, loaded, memoryAccess));
      }

      // goto killLabel;
      ops.add(rdcspv::OpBranch(killLabel));

      // killLabel:
      ops.add(rdcspv::OpLabel(killLabel));
    }
    ops.add(rdcspv::OpReturn());

    ops.add(rdcspv::OpFunctionEnd());

    editor.AddFunction(ops);
  }
}