void AnnotateShader()

in renderdoc/driver/vulkan/vk_shader_feedback.cpp [377:1581]


void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchData, ShaderStage stage,
                    const char *entryName, const std::map<BindKey, BindData> &offsetMap,
                    uint32_t maxSlot, bool usePrimitiveID, VkDeviceAddress addr,
                    bool bufferAddressKHR, bool usesMultiview, rdcarray<uint32_t> &modSpirv,
                    std::map<uint32_t, PrintfData> &printfData)
{
  // calculate offsets for IDs on the original unmodified SPIR-V. The editor may insert some nops,
  // so we do it manually here
  std::map<rdcspv::Id, uint32_t> idToOffset;

  for(rdcspv::Iter it(modSpirv, rdcspv::FirstRealWord); it; it++)
    idToOffset[rdcspv::OpDecoder(it).result] = (uint32_t)it.offs();

  rdcspv::Editor editor(modSpirv);

  editor.Prepare();

  RDCASSERTMSG("SPIR-V module is too large to encode instruction ID!", modSpirv.size() < 0xfffffffU);

  const bool useBufferAddress = (addr != 0);

  const uint32_t targetIndexWidth = useBufferAddress ? sizeof(uintvulkanmax_t) * 8 : 32;

  // store the maximum slot we can use, for clamping outputs to avoid writing out of bounds
  rdcspv::Id maxSlotID = targetIndexWidth == 64 ? editor.AddConstantImmediate<uint64_t>(maxSlot)
                                                : editor.AddConstantImmediate<uint32_t>(maxSlot);

  rdcspv::Id maxPrintfWordOffset =
      editor.AddConstantImmediate<uint32_t>(Vulkan_Debug_PrintfBufferSize() / sizeof(uint32_t));

  rdcspv::Id falsePrintfValue = editor.AddConstantImmediate<uint32_t>(0U);
  rdcspv::Id truePrintfValue = editor.AddConstantImmediate<uint32_t>(1U);

  rdcspv::Id uint32Type = editor.DeclareType(rdcspv::scalar<uint32_t>());
  rdcspv::Id int32Type = editor.DeclareType(rdcspv::scalar<int32_t>());
  rdcspv::Id f32Type = editor.DeclareType(rdcspv::scalar<float>());
  rdcspv::Id uint64Type;
  rdcspv::Id uint32StructID;
  rdcspv::Id indexOffsetType;

  // if the module declares int64 capability, or we use it, ensure uint64 is declared in case we
  // need to transform it for printf arguments
  if(editor.HasCapability(rdcspv::Capability::Int64) || targetIndexWidth == 64)
  {
    editor.AddCapability(rdcspv::Capability::Int64);
    uint64Type = editor.DeclareType(rdcspv::scalar<uint64_t>());
  }

  if(useBufferAddress)
  {
    uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {uint32Type}));

    // any function parameters we add are byte offsets
    indexOffsetType = editor.DeclareType(rdcspv::scalar<uintvulkanmax_t>());
  }
  else
  {
    rdcspv::Id runtimeArrayID =
        editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), uint32Type));

    editor.AddDecoration(rdcspv::OpDecorate(
        runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t))));

    uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));

    // any function parameters we add are uint32 indices
    indexOffsetType = uint32Type;
  }

  editor.SetName(uint32StructID, "__rd_feedbackStruct");

  editor.AddDecoration(rdcspv::OpMemberDecorate(
      uint32StructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));

  // map from variable ID to watch, to variable ID to get offset from (as a SPIR-V constant,
  // or as either byte offset for buffer addressing or ssbo index otherwise)
  std::map<rdcspv::Id, rdcspv::Id> varLookup;

  // iterate over all variables. We do this here because in the absence of the buffer address
  // extension we might declare our own below and patch bindings - so we need to look these up now
  for(const rdcspv::Variable &var : editor.GetGlobals())
  {
    // skip variables without one of these storage classes, as they are not descriptors
    if(var.storage != rdcspv::StorageClass::UniformConstant &&
       var.storage != rdcspv::StorageClass::Uniform &&
       var.storage != rdcspv::StorageClass::StorageBuffer)
      continue;

    // figure out which interface this variable is in to make our key
    BindKey key = {};
    key.stage = refl.stage;

    int32_t idx = -1;
    if((idx = patchData.cblockInterface.indexOf(var.id)) >= 0)
    {
      key.index.category = DescriptorCategory::ConstantBlock;
      key.index.index = (uint32_t)idx;
    }
    else if((idx = patchData.samplerInterface.indexOf(var.id)) >= 0)
    {
      key.index.category = DescriptorCategory::Sampler;
      key.index.index = (uint32_t)idx;
    }
    else if((idx = patchData.roInterface.indexOf(var.id)) >= 0)
    {
      key.index.category = DescriptorCategory::ReadOnlyResource;
      key.index.index = (uint32_t)idx;
    }
    else if((idx = patchData.rwInterface.indexOf(var.id)) >= 0)
    {
      key.index.category = DescriptorCategory::ReadWriteResource;
      key.index.index = (uint32_t)idx;
    }

    // if this is one of the bindings we care about
    auto it = offsetMap.find(key);
    if(it != offsetMap.end())
    {
      // store the offset for this variable so we watch for access chains and know where to store to
      if(useBufferAddress)
      {
        rdcspv::Id id = varLookup[var.id] =
            editor.AddConstantImmediate<uintvulkanmax_t>(uintvulkanmax_t(it->second.offset));

        editor.SetName(
            id, StringFormat::Fmt("__feedbackOffset_%s_%u", ToStr(it->first.index.category).c_str(),
                                  it->first.index.index));
      }
      else
      {
        // check that the offset fits in 32-bit word, convert byte offset to uint32 index
        uint64_t index = it->second.offset / 4;
        RDCASSERT(index < 0xFFFFFFFFULL, it->first.index.category, it->first.index.index,
                  it->second.offset);
        rdcspv::Id id = varLookup[var.id] = editor.AddConstantImmediate<uint32_t>(uint32_t(index));

        editor.SetName(
            id, StringFormat::Fmt("__feedbackOffset_%s_%u", ToStr(it->first.index.category).c_str(),
                                  it->first.index.index));
      }
    }
  }

  rdcspv::Id carryStructType = editor.DeclareStructType({uint32Type, uint32Type});
  rdcspv::Id bufferAddressConst, ssboVar, uint32ptrtype;

  if(usesMultiview &&
     (stage == ShaderStage::Pixel || stage == ShaderStage::Vertex || stage == ShaderStage::Geometry))
  {
    editor.AddCapability(rdcspv::Capability::MultiView);
    editor.AddExtension("SPV_KHR_multiview");
  }

  if(usePrimitiveID && stage == ShaderStage::Fragment && Vulkan_PrintfFetch())
  {
    editor.AddCapability(rdcspv::Capability::Geometry);
  }

  rdcarray<rdcspv::Id> newGlobals;

  if(useBufferAddress)
  {
    // add the extension
    editor.AddExtension(bufferAddressKHR ? "SPV_KHR_physical_storage_buffer"
                                         : "SPV_EXT_physical_storage_buffer");

    // change the memory model to physical storage buffer 64
    rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
    rdcspv::OpMemoryModel model(it);
    model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
    it = model;

    // add capabilities
    editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);
    // for simplicity on KHR we always load from uint2 so we're compatible with the case where int64
    // isn't supported
    if(bufferAddressKHR)
    {
      rdcspv::Id addressConstantLSB = editor.AddConstantImmediate<uint32_t>(addr & 0xffffffffu);
      rdcspv::Id addressConstantMSB =
          editor.AddConstantImmediate<uint32_t>((addr >> 32) & 0xffffffffu);

      rdcspv::Id uint2 = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));

      bufferAddressConst = editor.AddConstant(rdcspv::OpConstantComposite(
          uint2, editor.MakeId(), {addressConstantLSB, addressConstantMSB}));
    }
    else
    {
      editor.AddCapability(rdcspv::Capability::Int64);

      // declare the address constants and make our pointers physical storage buffer pointers
      bufferAddressConst = editor.AddConstantImmediate<uint64_t>(addr);
    }

    uint32ptrtype =
        editor.DeclareType(rdcspv::Pointer(uint32Type, rdcspv::StorageClass::PhysicalStorageBuffer));

    editor.SetName(bufferAddressConst, "__rd_feedbackAddress");

    // struct is block decorated
    editor.AddDecoration(rdcspv::OpDecorate(uint32StructID, rdcspv::Decoration::Block));
  }
  else
  {
    rdcspv::StorageClass ssboClass = editor.StorageBufferClass();

    // the pointers are SSBO pointers
    rdcspv::Id bufptrtype = editor.DeclareType(rdcspv::Pointer(uint32StructID, ssboClass));
    uint32ptrtype = editor.DeclareType(rdcspv::Pointer(uint32Type, ssboClass));

    // patch all bindings up by 1
    for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
                     end = editor.End(rdcspv::Section::Annotations);
        it < end; ++it)
    {
      // we will use descriptor set 0 for our own purposes if we don't have a buffer address.
      //
      // Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
      // redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
      // where all descriptor sets are already used. In theory we only have to do this with set 0,
      // but that requires knowing which variables are in set 0 and it's simpler to increase all
      // bindings.
      if(it.opcode() == rdcspv::Op::Decorate)
      {
        rdcspv::OpDecorate dec(it);
        if(dec.decoration == rdcspv::Decoration::Binding)
        {
          RDCASSERT(dec.decoration.binding != 0xffffffff);
          dec.decoration.binding += 1;
          it = dec;
        }
      }
    }

    // add our SSBO variable, at set 0 binding 0
    ssboVar = editor.MakeId();
    editor.AddVariable(rdcspv::OpVariable(bufptrtype, ssboVar, ssboClass));
    editor.AddDecoration(
        rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
    editor.AddDecoration(
        rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(0)));

    if(editor.EntryPointAllGlobals())
      newGlobals.push_back(ssboVar);

    editor.SetName(ssboVar, "__rd_feedbackBuffer");

    editor.DecorateStorageBufferStruct(uint32StructID);
  }

  rdcspv::Id rtarrayOffset = editor.AddConstantImmediate<uint32_t>(0U);
  rdcspv::Id printfArrayOffset = rtarrayOffset;
  rdcspv::Id zero = rtarrayOffset;
  rdcspv::Id usedValue = editor.AddConstantImmediate<uint32_t>(0xFFFFFFFFU);
  rdcspv::Id scope = editor.AddConstantImmediate<uint32_t>((uint32_t)rdcspv::Scope::Invocation);
  rdcspv::Id semantics = editor.AddConstantImmediate<uint32_t>(0U);
  rdcspv::Id uint32shift = editor.AddConstantImmediate<uint32_t>(2U);

  rdcspv::MemoryAccessAndParamDatas memoryAccess;
  memoryAccess.setAligned(sizeof(uint32_t));

  rdcspv::Id printfIncrement;

  if(useBufferAddress)
  {
    printfIncrement = editor.AddConstantImmediate<uintvulkanmax_t>(sizeof(uint32_t));
  }
  else
  {
    printfIncrement = editor.AddConstantImmediate<uint32_t>(1U);
  }

  rdcspv::Id glsl450 = editor.ImportExtInst("GLSL.std.450");

  std::map<rdcspv::Id, rdcspv::Scalar> intTypeLookup;

  for(auto scalarType : editor.GetTypeInfo<rdcspv::Scalar>())
    if(scalarType.first.type == rdcspv::Op::TypeInt)
      intTypeLookup[scalarType.second] = scalarType.first;

  rdcspv::Id entryID;
  for(const rdcspv::EntryPoint &entry : editor.GetEntries())
  {
    if(entry.name == entryName && MakeShaderStage(entry.executionModel) == stage)
    {
      entryID = entry.id;
      break;
    }
  }

  rdcspv::Id uvec2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
  rdcspv::Id uvec3Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 3));
  rdcspv::Id uvec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));

  // we'll initialise this at the start of the entry point, and use it globally to get the location
  // for printf statements
  rdcspv::Id printfLocationVar = editor.MakeId();

  if(Vulkan_PrintfFetch())
  {
    editor.AddVariable(rdcspv::OpVariable(
        editor.DeclareType(rdcspv::Pointer(uvec4Type, rdcspv::StorageClass::Private)),
        printfLocationVar, rdcspv::StorageClass::Private));

    if(editor.EntryPointAllGlobals())
      newGlobals.push_back(printfLocationVar);
  }

  rdcspv::Id shaderStageConstant =
      editor.AddConstantImmediate<uint32_t>(uint32_t(stage) << ShaderStageHeaderBitShift);
  rdcspv::Id int64wordshift = editor.AddConstantImmediate<uint32_t>(32U);

  // build up operations to pull in the location from globals - either existing or ones we add
  rdcspv::OperationList locationGather;

  if(Vulkan_PrintfFetch())
  {
    rdcarray<rdcspv::Id> idxs;

    auto fetchOrAddGlobalInput = [&editor, &idxs, &refl, &patchData, &locationGather, &newGlobals](
                                     const char *name, ShaderBuiltin builtin,
                                     rdcspv::BuiltIn spvBuiltin, rdcspv::Id varType, bool integer) {
      rdcspv::Id ret;

      rdcspv::Id ptrType = editor.DeclareType(rdcspv::Pointer(varType, rdcspv::StorageClass::Input));

      for(size_t i = 0; i < refl.inputSignature.size(); i++)
      {
        if(refl.inputSignature[i].systemValue == builtin)
        {
          rdcspv::Id loadType = varType;
          if(refl.inputSignature[i].varType == VarType::SInt)
          {
            if(refl.inputSignature[i].compCount == 1)
              loadType = editor.DeclareType(rdcspv::scalar<int32_t>());
            else
              loadType = editor.DeclareType(
                  rdcspv::Vector(rdcspv::scalar<int32_t>(), refl.inputSignature[i].compCount));
          }

          if(patchData.inputs[i].accessChain.empty())
          {
            ret =
                locationGather.add(rdcspv::OpLoad(loadType, editor.MakeId(), patchData.inputs[i].ID));
          }
          else
          {
            rdcarray<rdcspv::Id> chain;

            for(uint32_t accessIdx : patchData.inputs[i].accessChain)
            {
              idxs.resize_for_index(accessIdx);
              if(idxs[accessIdx] == 0)
                idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);

              chain.push_back(idxs[accessIdx]);
            }

            rdcspv::Id subElement = locationGather.add(
                rdcspv::OpAccessChain(ptrType, editor.MakeId(), patchData.inputs[i].ID, chain));

            ret = locationGather.add(rdcspv::OpLoad(loadType, editor.MakeId(), subElement));
          }

          if(loadType != varType)
            ret = locationGather.add(rdcspv::OpBitcast(varType, editor.MakeId(), ret));
        }
      }

      if(ret == rdcspv::Id())
      {
        rdcspv::Id rdocGlobalVar = editor.AddVariable(
            rdcspv::OpVariable(ptrType, editor.MakeId(), rdcspv::StorageClass::Input));
        editor.AddDecoration(rdcspv::OpDecorate(
            rdocGlobalVar, rdcspv::DecorationParam<rdcspv::Decoration::BuiltIn>(spvBuiltin)));
        // Fragment shader inputs that are signed or unsigned integers, integer vectors, or any
        // double-precision floating-point type must be decorated with Flat.
        if(integer && refl.stage == ShaderStage::Pixel)
          editor.AddDecoration(rdcspv::OpDecorate(rdocGlobalVar, rdcspv::Decoration::Flat));

        newGlobals.push_back(rdocGlobalVar);

        editor.SetName(rdocGlobalVar, name);

        ret = locationGather.add(rdcspv::OpLoad(varType, editor.MakeId(), rdocGlobalVar));
      }

      return ret;
    };

    rdcspv::Id location;

    // the location encoding varies by stage
    if(stage == ShaderStage::Compute)
    {
      // the location for compute is easy, it's just the global invocation
      location = fetchOrAddGlobalInput("rdoc_invocation", ShaderBuiltin::DispatchThreadIndex,
                                       rdcspv::BuiltIn::GlobalInvocationId, uvec3Type, true);

      location = locationGather.add(
          rdcspv::OpVectorShuffle(uvec4Type, editor.MakeId(), location, location, {0, 1, 2, 0}));
    }
    else if(stage == ShaderStage::Task)
    {
      // the location for task shaders is the same
      location = fetchOrAddGlobalInput("rdoc_invocation", ShaderBuiltin::DispatchThreadIndex,
                                       rdcspv::BuiltIn::GlobalInvocationId, uvec3Type, true);

      location = locationGather.add(
          rdcspv::OpVectorShuffle(uvec4Type, editor.MakeId(), location, location, {0, 1, 2, 0}));
    }
    else if(stage == ShaderStage::Mesh)
    {
      // the location for mesh shaders is packed a smidge tighter.
      // we need three 3D locators:
      //   (optional) task group index
      //   mesh group index
      //   local thread index
      //
      // the local index has a compile-time known stride so we can use the linear index, which we
      // can give 16 bits to be very generous (10 bits is a more realistic upper bound)
      //
      // similarly the task group index has a known stride so we can use a linear index for it as
      // well. Giving it 32 bits covers any reasonable use (~26 bits is the max reported at the time
      // of writing)
      //
      // annoyingly this leaves us 48 bits per task group index dimension. That is enough for a
      // linear ID easily but it does not have a easily known stride (for a task shader it depends
      // on the OpEmitMeshTasksEXT dimensions). It's not enough for the worst case in each dimension
      // which some drivers report as [4194304,65535,65535] which requires 22,16,16 bits. Those
      // drivers don't allow a shader to dispatch that many in all dimensions as the product is
      // still constrained.
      //
      // So instead we've just used 4 uints for the location just for the mesh shader. We still have
      // to compress things a little so we put the mesh thread in the upper 16-bits with mesh group
      // z
      rdcspv::Id meshThread =
          fetchOrAddGlobalInput("rdoc_meshThread", ShaderBuiltin::GroupFlatIndex,
                                rdcspv::BuiltIn::LocalInvocationIndex, uint32Type, true);
      rdcspv::Id meshGroup = fetchOrAddGlobalInput("rdoc_meshGroup", ShaderBuiltin::GroupIndex,
                                                   rdcspv::BuiltIn::WorkgroupId, uvec3Type, true);

      // TODO read task ID from payload
      rdcspv::Id taskId = zero;

      rdcspv::Id meshThreadShifted = locationGather.add(rdcspv::OpShiftLeftLogical(
          uint32Type, editor.MakeId(), meshThread, editor.AddConstantImmediate<uint32_t>(16U)));
      rdcspv::Id meshGroupX =
          locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), meshGroup, {0}));
      rdcspv::Id meshGroupY =
          locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), meshGroup, {1}));
      rdcspv::Id meshGroupZ =
          locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), meshGroup, {2}));
      meshGroupZ = locationGather.add(rdcspv::OpBitwiseAnd(
          uint32Type, editor.MakeId(), meshGroupZ, editor.AddConstantImmediate<uint32_t>(0xffff)));
      meshGroupZ = locationGather.add(
          rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(), meshGroupZ, meshThreadShifted));

      location = locationGather.add(rdcspv::OpCompositeConstruct(
          uvec4Type, editor.MakeId(), {meshGroupX, meshGroupY, meshGroupZ, taskId}));
    }
    else if(stage == ShaderStage::Vertex || stage == ShaderStage::Pixel)
    {
      rdcspv::Id view;

      // only search for the view index is the multiview capability is declared, otherwise it's
      // invalid and we just set 0. Valid for both Vertex and Pixel shaders
      if(editor.HasCapability(rdcspv::Capability::MultiView))
      {
        view = fetchOrAddGlobalInput("rdoc_viewIndex", ShaderBuiltin::MultiViewIndex,
                                     rdcspv::BuiltIn::ViewIndex, uint32Type, true);
      }
      else
      {
        view = editor.AddConstantImmediate<uint32_t>(0U);
      }

      if(stage == ShaderStage::Vertex)
      {
        rdcspv::Id vtx = fetchOrAddGlobalInput("rdoc_vertexIndex", ShaderBuiltin::VertexIndex,
                                               rdcspv::BuiltIn::VertexIndex, uint32Type, true);
        rdcspv::Id inst = fetchOrAddGlobalInput("rdoc_instanceIndex", ShaderBuiltin::InstanceIndex,
                                                rdcspv::BuiltIn::InstanceIndex, uint32Type, true);

        location = locationGather.add(
            rdcspv::OpCompositeConstruct(uvec4Type, editor.MakeId(), {vtx, inst, view, zero}));
      }
      else if(stage == ShaderStage::Pixel)
      {
        rdcspv::Id float2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 2));
        rdcspv::Id float4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 4));

        rdcspv::Id coord = fetchOrAddGlobalInput("rdoc_fragCoord", ShaderBuiltin::Position,
                                                 rdcspv::BuiltIn::FragCoord, float4Type, false);

        // grab just the xy
        coord = locationGather.add(
            rdcspv::OpVectorShuffle(float2Type, editor.MakeId(), coord, coord, {0, 1}));

        // convert to int
        coord = locationGather.add(rdcspv::OpConvertFToU(uvec2Type, editor.MakeId(), coord));

        rdcspv::Id x =
            locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), coord, {0}));
        rdcspv::Id y =
            locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), coord, {1}));

        // shift x up into top 16-bits
        x = locationGather.add(rdcspv::OpShiftLeftLogical(
            uint32Type, editor.MakeId(), x, editor.AddConstantImmediate<uint32_t>(16U)));

        // OR together
        coord = locationGather.add(rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(), x, y));

        rdcspv::Id samp;

        // only grab the sample ID if sample shading is already enabled
        for(size_t i = 0; i < refl.inputSignature.size(); i++)
        {
          if(refl.inputSignature[i].systemValue == ShaderBuiltin::MSAASampleIndex ||
             refl.inputSignature[i].systemValue == ShaderBuiltin::MSAASamplePosition)
          {
            samp = fetchOrAddGlobalInput("rdoc_sampleIndex", ShaderBuiltin::MSAASampleIndex,
                                         rdcspv::BuiltIn::SampleId, uint32Type, true);
          }
        }

        if(samp == rdcspv::Id())
        {
          samp = editor.AddConstantImmediate<uint32_t>(~0U);
        }

        // shift samp up into top 16-bits
        samp = locationGather.add(rdcspv::OpShiftLeftLogical(
            uint32Type, editor.MakeId(), samp, editor.AddConstantImmediate<uint32_t>(16U)));

        // OR samp and view together
        view = locationGather.add(rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(), samp, view));

        rdcspv::Id prim;

        if(usePrimitiveID)
        {
          prim = fetchOrAddGlobalInput("rdoc_primitiveIndex", ShaderBuiltin::PrimitiveIndex,
                                       rdcspv::BuiltIn::PrimitiveId, uint32Type, true);
        }
        else
        {
          prim = editor.AddConstantImmediate<uint32_t>(~0U);
        }

        location = locationGather.add(
            rdcspv::OpCompositeConstruct(uvec4Type, editor.MakeId(), {coord, view, prim, zero}));
      }
    }
    else if(stage == ShaderStage::Geometry)
    {
      rdcspv::Id prim = fetchOrAddGlobalInput("rdoc_primitiveIndex", ShaderBuiltin::PrimitiveIndex,
                                              rdcspv::BuiltIn::PrimitiveId, uint32Type, true);

      rdcspv::Id view;

      // only search for the view index is the multiview capability is declared, otherwise it's
      // invalid and we just set 0. Valid for both Vertex and Pixel shaders
      if(editor.HasCapability(rdcspv::Capability::MultiView))
      {
        view = fetchOrAddGlobalInput("rdoc_viewIndex", ShaderBuiltin::MultiViewIndex,
                                     rdcspv::BuiltIn::ViewIndex, uint32Type, true);
      }
      else
      {
        view = editor.AddConstantImmediate<uint32_t>(0U);
      }

      location = locationGather.add(
          rdcspv::OpCompositeConstruct(uvec4Type, editor.MakeId(), {prim, view, zero, zero}));
    }
    else
    {
      RDCWARN("No identifier stored for %s stage", ToStr(stage).c_str());
      location = locationGather.add(
          rdcspv::OpCompositeConstruct(uvec4Type, editor.MakeId(), {zero, zero, zero, zero}));
    }

    locationGather.add(rdcspv::OpStore(printfLocationVar, location));
  }

  if(!newGlobals.empty())
  {
    rdcspv::Iter it = editor.GetEntry(entryID);

    RDCASSERT(it.opcode() == rdcspv::Op::EntryPoint);

    rdcspv::OpEntryPoint entry(it);

    editor.Remove(it);

    entry.iface.append(newGlobals);

    editor.AddOperation(it, entry);
  }

  rdcspv::Id debugPrintfSet = editor.HasExtInst("NonSemantic.DebugPrintf");

  rdcspv::TypeToIds<rdcspv::FunctionType> funcTypes = editor.GetTypes<rdcspv::FunctionType>();

  // functions that have been patched with annotation & extra function parameters if needed
  std::set<rdcspv::Id> patchedFunctions;

  // functions we need to patch, with the indices of which parameters have bindings coming along
  // with
  std::map<rdcspv::Id, rdcarray<size_t>> functionPatchQueue;

  // start with the entry point, with no parameters to patch
  functionPatchQueue[entryID] = {};

  // now keep patching functions until we have no more to patch
  while(!functionPatchQueue.empty())
  {
    rdcspv::Id funcId;
    rdcarray<size_t> patchArgIndices;

    {
      auto it = functionPatchQueue.begin();
      funcId = functionPatchQueue.begin()->first;
      patchArgIndices = functionPatchQueue.begin()->second;
      functionPatchQueue.erase(it);

      patchedFunctions.insert(funcId);
    }

    rdcspv::Iter it = editor.GetID(funcId);

    RDCASSERT(it.opcode() == rdcspv::Op::Function);

    if(!patchArgIndices.empty())
    {
      rdcspv::OpFunction func(it);

      // find the function's type declaration, add the necessary arguments, redeclare and patch it
      for(const rdcspv::TypeToId<rdcspv::FunctionType> &funcType : funcTypes)
      {
        if(funcType.second == func.functionType)
        {
          rdcspv::FunctionType patchedFuncType = funcType.first;
          for(size_t i = 0; i < patchArgIndices.size(); i++)
            patchedFuncType.argumentIds.push_back(indexOffsetType);

          rdcspv::Id newFuncTypeID = editor.DeclareType(patchedFuncType);

          // re-fetch the iterator as it might have moved with the type declaration
          it = editor.GetID(funcId);

          // change the declared function type
          func.functionType = newFuncTypeID;

          editor.PreModify(it);

          it = func;

          editor.PostModify(it);

          break;
        }
      }
    }

    ++it;

    // onto the OpFunctionParameters. First allocate IDs for all our new function parameters
    rdcarray<rdcspv::Id> patchedParamIDs;
    for(size_t i = 0; i < patchArgIndices.size(); i++)
      patchedParamIDs.push_back(editor.MakeId());

    size_t argIndex = 0;
    size_t watchIndex = 0;
    while(it.opcode() == rdcspv::Op::FunctionParameter)
    {
      rdcspv::OpFunctionParameter param(it);

      // if this is a parameter we're patching, add it into varLookup
      if(watchIndex < patchArgIndices.size() && patchArgIndices[watchIndex] == argIndex)
      {
        // when we see use of this parameter, patch it using the added parameter
        varLookup[param.result] = patchedParamIDs[watchIndex];
        // watch for the next argument
        watchIndex++;
      }

      argIndex++;
      ++it;
    }

    // we're past the existing function parameters, now declare our new ones
    for(size_t i = 0; i < patchedParamIDs.size(); i++)
    {
      editor.AddOperation(it, rdcspv::OpFunctionParameter(indexOffsetType, patchedParamIDs[i]));
      ++it;
    }

    // continue to the first label so we can insert things at the start of the entry point
    for(; it; ++it)
    {
      if(it.opcode() == rdcspv::Op::Label)
      {
        ++it;
        break;
      }
    }

    // skip past any local variables
    while(it.opcode() == rdcspv::Op::Variable || it.opcode() == rdcspv::Op::Line ||
          it.opcode() == rdcspv::Op::NoLine)
      ++it;

    if(funcId == entryID)
      editor.AddOperations(it, locationGather);

    // now patch accesses in the function body
    for(; it; ++it)
    {
      // finish when we hit the end of the function
      if(it.opcode() == rdcspv::Op::FunctionEnd)
        break;

      // if we see an OpCopyObject, just add it to the map pointing to the same value
      if(it.opcode() == rdcspv::Op::CopyObject)
      {
        rdcspv::OpCopyObject copy(it);

        // is this a var we want to snoop?
        auto varIt = varLookup.find(copy.operand);
        if(varIt != varLookup.end())
        {
          varLookup[copy.result] = varIt->second;
        }
      }

      if(it.opcode() == rdcspv::Op::FunctionCall)
      {
        rdcspv::OpFunctionCall call(it);

        // check if any of the variables being passed are ones we care about. Accumulate the added
        // parameters
        rdcarray<uint32_t> funccall;
        rdcarray<size_t> patchArgs;

        // examine each argument to see if it's one we care about
        for(size_t i = 0; i < call.arguments.size(); i++)
        {
          // if this param we're snooping then pass our offset - whether it's a constant or a
          // function
          // argument itself - into the function call
          auto varIt = varLookup.find(call.arguments[i]);
          if(varIt != varLookup.end())
          {
            funccall.push_back(varIt->second.value());
            patchArgs.push_back(i);
          }
        }

        // if we have parameters to patch, replace the function call
        if(!funccall.empty())
        {
          // prepend all the existing words
          for(size_t i = 1; i < it.size(); i++)
            funccall.insert(i - 1, it.word(i));

          rdcspv::Iter oldCall = it;

          // add our patched call afterwards
          it++;
          editor.AddOperation(it, rdcspv::Operation(rdcspv::Op::FunctionCall, funccall));

          // remove the old call
          editor.Remove(oldCall);
        }

        // if this function isn't marked for patching yet, and isn't patched, queue it
        if(functionPatchQueue[call.function].empty() &&
           patchedFunctions.find(call.function) == patchedFunctions.end())
          functionPatchQueue[call.function] = patchArgs;
      }

      if(it.opcode() == rdcspv::Op::ExtInst && Vulkan_PrintfFetch())
      {
        rdcspv::OpExtInst extinst(it);
        // is this a printf extinst?
        if(extinst.set == debugPrintfSet)
        {
          uint32_t printfID = idToOffset[extinst.result];

          rdcspv::Id resultConstant = editor.AddConstantDeferred<uint32_t>(printfID);

          PrintfData &format = printfData[printfID];

          {
            rdcspv::OpString str(editor.GetID(rdcspv::Id::fromWord(extinst.params[0])));
            format.user_format = str.string;
            format.effective_format = PatchFormatString(str.string);
          }

          rdcarray<rdcspv::Id> packetWords;

          // pack all the parameters into uint32s
          for(size_t i = 1; i < extinst.params.size(); i++)
          {
            rdcspv::Id printfparam = rdcspv::Id::fromWord(extinst.params[i]);
            rdcspv::Id type = editor.GetIDType(printfparam);

            rdcspv::Iter typeIt = editor.GetID(type);

            // handle vectors, but no other composites
            uint32_t vecDim = 0;
            if(typeIt.opcode() == rdcspv::Op::TypeVector)
            {
              rdcspv::OpTypeVector vec(typeIt);
              vecDim = vec.componentCount;
              type = vec.componentType;
              typeIt = editor.GetID(type);
            }

            rdcspv::Scalar scalarType(typeIt);

            for(uint32_t comp = 0; comp < RDCMAX(1U, vecDim); comp++)
            {
              rdcspv::Id input = printfparam;

              format.argTypes.push_back(scalarType);

              // if the input is a vector, extract the component we're working on
              if(vecDim > 0)
              {
                input = editor.AddOperation(
                    it, rdcspv::OpCompositeExtract(type, editor.MakeId(), input, {comp}));
                it++;
              }

              // handle ints, floats, and bools
              if(typeIt.opcode() == rdcspv::Op::TypeInt)
              {
                rdcspv::OpTypeInt intType(typeIt);

                rdcspv::Id param = input;

                if(intType.signedness)
                {
                  // extend to 32-bit if needed then bitcast to unsigned
                  if(intType.width < 32)
                  {
                    param = editor.AddOperation(
                        it, rdcspv::OpSConvert(int32Type, editor.MakeId(), param));
                    it++;
                  }

                  param = editor.AddOperation(
                      it, rdcspv::OpBitcast(intType.width == 64 ? uint64Type : uint32Type,
                                            editor.MakeId(), param));
                  it++;
                }
                else
                {
                  // just extend to 32-bit if needed
                  if(intType.width < 32)
                  {
                    param = editor.AddOperation(
                        it, rdcspv::OpSConvert(uint32Type, editor.MakeId(), param));
                    it++;
                  }
                }

                // 64-bit integers we now need to split up the words and add them. Otherwise we have
                // a 32-bit uint to add
                if(intType.width == 64)
                {
                  rdcspv::Id lo = editor.AddOperation(
                      it, rdcspv::OpUConvert(uint32Type, editor.MakeId(), param));
                  it++;

                  rdcspv::Id shifted = editor.AddOperation(
                      it, rdcspv::OpShiftRightLogical(uint64Type, editor.MakeId(), param,
                                                      int64wordshift));
                  it++;

                  rdcspv::Id hi = editor.AddOperation(
                      it, rdcspv::OpUConvert(uint32Type, editor.MakeId(), shifted));
                  it++;

                  packetWords.push_back(lo);
                  packetWords.push_back(hi);
                }
                else
                {
                  packetWords.push_back(param);
                }
              }
              else if(typeIt.opcode() == rdcspv::Op::TypeBool)
              {
                packetWords.push_back(
                    editor.AddOperation(it, rdcspv::OpSelect(uint32Type, editor.MakeId(), input,
                                                             truePrintfValue, falsePrintfValue)));
                it++;
              }
              else if(typeIt.opcode() == rdcspv::Op::TypeFloat)
              {
                rdcspv::OpTypeFloat floatType(typeIt);

                rdcspv::Id param = input;

                // if it's not at least a float, upconvert. We don't convert to doubles since that
                // would require double capability
                if(floatType.width < 32)
                {
                  param =
                      editor.AddOperation(it, rdcspv::OpFConvert(f32Type, editor.MakeId(), param));
                  it++;
                }

                if(floatType.width == 64)
                {
                  // for doubles we use the GLSL unpack operation
                  rdcspv::Id unpacked = editor.AddOperation(
                      it, rdcspv::OpGLSL450(uvec2Type, editor.MakeId(), glsl450,
                                            rdcspv::GLSLstd450::UnpackDouble2x32, {param}));

                  // then extract the components
                  rdcspv::Id lo = editor.AddOperation(
                      it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), unpacked, {0}));
                  it++;

                  rdcspv::Id hi = editor.AddOperation(
                      it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), unpacked, {1}));
                  it++;

                  packetWords.push_back(lo);
                  packetWords.push_back(hi);
                }
                else
                {
                  // otherwise we bitcast to uint32
                  param =
                      editor.AddOperation(it, rdcspv::OpBitcast(uint32Type, editor.MakeId(), param));
                  it++;

                  packetWords.push_back(param);
                }
              }
              else
              {
                RDCERR("Unexpected type of operand to printf %s, ignoring",
                       ToStr(typeIt.opcode()).c_str());
              }
            }
          }

          format.payloadWords = packetWords.size();

          // pack header uint32
          rdcspv::Id header =
              editor.AddOperation(it, rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(),
                                                          shaderStageConstant, resultConstant));
          it++;

          packetWords.insert(0, header);

          // load the location out of the global where we put it
          rdcspv::Id location =
              editor.AddOperation(it, rdcspv::OpLoad(uvec4Type, editor.MakeId(), printfLocationVar));
          it++;

          // extract each component and add it as a new word after the header
          packetWords.insert(
              1, editor.AddOperation(
                     it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {0})));
          it++;
          packetWords.insert(
              2, editor.AddOperation(
                     it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {1})));
          it++;
          packetWords.insert(
              3, editor.AddOperation(
                     it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {2})));
          it++;
          packetWords.insert(
              4, editor.AddOperation(
                     it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {3})));
          it++;

          rdcspv::Id counterptr;

          if(useBufferAddress)
          {
            // make a pointer out of the buffer address
            // uint32_t *bufptr = (uint32_t *)(ptr+0)
            counterptr = MakeOffsettedPointer<uintvulkanmax_t>(
                editor, it, uint32ptrtype, carryStructType, bufferAddressConst, rdcspv::Id());

            it++;
          }
          else
          {
            // accesschain to get the pointer we'll atomic into.
            // accesschain is 0 to access rtarray (first member) then zero for the first array index
            // uint32_t *bufptr = (uint32_t *)&buf.printfWords[ssboindex];
            counterptr =
                editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
                                                              ssboVar, {printfArrayOffset, zero}));
            it++;
          }

          rdcspv::Id packetSize = editor.AddConstantDeferred<uint32_t>((uint32_t)packetWords.size());

          // atomically reserve enough space
          rdcspv::Id idx =
              editor.AddOperation(it, rdcspv::OpAtomicIAdd(uint32Type, editor.MakeId(), counterptr,
                                                           scope, semantics, packetSize));
          it++;

          // clamp to the buffer size so we don't overflow
          idx = editor.AddOperation(
              it, rdcspv::OpGLSL450(uint32Type, editor.MakeId(), glsl450, rdcspv::GLSLstd450::UMin,
                                    {idx, maxPrintfWordOffset}));
          it++;

          if(useBufferAddress)
          {
            // convert to an offset value (upconverting as needed, indexOffsetType is always the
            // largest uint type)
            idx = editor.AddOperation(it, rdcspv::OpUConvert(indexOffsetType, editor.MakeId(), idx));
            it++;

            // the index is in words, so multiply by the increment to get a byte offset
            rdcspv::Id byteOffset = editor.AddOperation(
                it, rdcspv::OpIMul(indexOffsetType, editor.MakeId(), idx, printfIncrement));
            it++;

            for(rdcspv::Id word : packetWords)
            {
              // we pre-increment idx because it starts from 0 but we want to write into words
              // starting from [1] to leave the counter itself alone.
              byteOffset = editor.AddOperation(
                  it, rdcspv::OpIAdd(indexOffsetType, editor.MakeId(), byteOffset, printfIncrement));
              it++;

              rdcspv::Id ptr = MakeOffsettedPointer<uintvulkanmax_t>(
                  editor, it, uint32ptrtype, carryStructType, bufferAddressConst, byteOffset);
              it++;

              editor.AddOperation(it, rdcspv::OpStore(ptr, word, memoryAccess));
              it++;
            }
          }
          else
          {
            for(rdcspv::Id word : packetWords)
            {
              // we pre-increment idx because it starts from 0 but we want to write into words
              // starting from [1] to leave the counter itself alone.
              idx = editor.AddOperation(
                  it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), idx, printfIncrement));
              it++;

              rdcspv::Id ptr =
                  editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
                                                                ssboVar, {printfArrayOffset, idx}));
              it++;

              editor.AddOperation(it, rdcspv::OpStore(ptr, word));
              it++;
            }
          }

          // no it++ here, it will happen implicitly on loop continue
        }
      }

      // if we see an access chain of a variable we're snooping, save out the result
      if(it.opcode() == rdcspv::Op::AccessChain || it.opcode() == rdcspv::Op::InBoundsAccessChain)
      {
        rdcspv::OpAccessChain chain(it);
        chain.op = it.opcode();

        // is this a var we want to snoop?
        auto varIt = varLookup.find(chain.base);
        if(varIt != varLookup.end())
        {
          // multi-dimensional arrays of descriptors is not allowed - however an access chain could
          // be longer than 5 words (1 index). Think of the case of a uniform buffer where the first
          // index goes into the descriptor array, and further indices go inside the uniform buffer
          // members.
          RDCASSERT(chain.indexes.size() >= 1, chain.indexes.size());

          rdcspv::Id index = chain.indexes[0];

          // patch after the access chain
          it++;

          // upcast the index to our target uint size for indexing/offsetting
          {
            rdcspv::Id indexType = editor.GetIDType(index);

            if(indexType == rdcspv::Id())
            {
              RDCERR("Unknown type for ID %u, defaulting to uint32_t", index.value());
              indexType = uint32Type;
            }

            rdcspv::Scalar indexTypeData = rdcspv::scalar<uint32_t>();
            auto indexTypeIt = intTypeLookup.find(indexType);

            if(indexTypeIt != intTypeLookup.end())
            {
              indexTypeData = indexTypeIt->second;
            }
            else
            {
              RDCERR("Unknown index type ID %u, defaulting to uint32_t", indexType.value());
            }

            // if it's signed, bitcast it to unsigned
            if(indexTypeData.signedness)
            {
              indexTypeData.signedness = false;

              index = editor.AddOperation(
                  it, rdcspv::OpBitcast(editor.DeclareType(indexTypeData), editor.MakeId(), index));
              it++;
            }

            // if it's not wide enough, uconvert expand it
            if(indexTypeData.width != targetIndexWidth)
            {
              rdcspv::Id extendedtype =
                  editor.DeclareType(rdcspv::Scalar(rdcspv::Op::TypeInt, targetIndexWidth, false));
              index =
                  editor.AddOperation(it, rdcspv::OpUConvert(extendedtype, editor.MakeId(), index));
              it++;
            }
          }

          // clamp the index to the maximum slot. If the user is reading out of bounds, don't write
          // out of bounds.
          {
            rdcspv::Id clampedtype =
                editor.DeclareType(rdcspv::Scalar(rdcspv::Op::TypeInt, targetIndexWidth, false));
            index = editor.AddOperation(
                it, rdcspv::OpGLSL450(clampedtype, editor.MakeId(), glsl450,
                                      rdcspv::GLSLstd450::UMin, {index, maxSlotID}));
            it++;
          }

          rdcspv::Id bufptr;

          if(useBufferAddress)
          {
            // convert the constant embedded device address to a pointer

            // shift the index since this is a byte offset
            // shiftedindex = index << uint32shift
            rdcspv::Id shiftedindex = editor.AddOperation(
                it, rdcspv::OpShiftLeftLogical(indexOffsetType, editor.MakeId(), index, uint32shift));
            it++;

            // add the index on top of that
            // offsetaddr = bindingOffset + shiftedindex
            rdcspv::Id offsetaddr = editor.AddOperation(
                it, rdcspv::OpIAdd(indexOffsetType, editor.MakeId(), varIt->second, shiftedindex));
            it++;

            // make a pointer out of it
            // uint32_t *bufptr = (uint32_t *)(ptr + offsetaddr)
            bufptr = MakeOffsettedPointer<uintvulkanmax_t>(
                editor, it, uint32ptrtype, carryStructType, bufferAddressConst, offsetaddr);
            it++;
          }
          else
          {
            // accesschain into the SSBO, by adding the base offset for this var onto the index

            // add the index to this binding's base index
            // ssboindex = bindingOffset + index
            rdcspv::Id ssboindex = editor.AddOperation(
                it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), index, varIt->second));
            it++;

            // accesschain to get the pointer we'll atomic into.
            // accesschain is 0 to access rtarray (first member) then ssboindex for array index
            // uint32_t *bufptr = (uint32_t *)&buf.rtarray[ssboindex];
            bufptr =
                editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
                                                              ssboVar, {rtarrayOffset, ssboindex}));
            it++;
          }

          // atomically set the uint32 that's pointed to
          editor.AddOperation(it, rdcspv::OpAtomicUMax(uint32Type, editor.MakeId(), bufptr, scope,
                                                       semantics, usedValue));

          // no it++ here, it will happen implicitly on loop continue
        }
      }
    }
  }
}