ResourceId D3D12Replay::RenderOverlay()

in renderdoc/driver/d3d12/d3d12_overlay.cpp [1006:2539]


ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, DebugOverlay overlay,
                                      uint32_t eventId, const rdcarray<uint32_t> &passEvents)
{
  ID3D12Resource *resource = NULL;

  {
    auto it = m_pDevice->GetResourceList().find(texid);
    if(it != m_pDevice->GetResourceList().end())
      resource = it->second;
  }

  if(resource == NULL)
    return ResourceId();

  RenderOutputSubresource sub = GetRenderOutputSubresource(texid);

  if(sub.slice == ~0U)
  {
    RDCERR("Rendering overlay for %s couldn't find output to get subresource.", ToStr(texid).c_str());
    sub = RenderOutputSubresource(0, 0, 1);
  }

  D3D12MarkerRegion renderoverlay(m_pDevice->GetQueue(),
                                  StringFormat::Fmt("RenderOverlay %d", overlay));

  D3D12_RESOURCE_DESC resourceDesc = resource->GetDesc();

  D3D12_RESOURCE_DESC overlayTexDesc;
  overlayTexDesc.Alignment = 0;
  overlayTexDesc.DepthOrArraySize = resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D
                                        ? resourceDesc.DepthOrArraySize
                                        : 1;
  overlayTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
  overlayTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
  overlayTexDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
  overlayTexDesc.Height = resourceDesc.Height;
  overlayTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
  overlayTexDesc.MipLevels = resourceDesc.MipLevels;
  overlayTexDesc.SampleDesc = resourceDesc.SampleDesc;
  overlayTexDesc.Width = resourceDesc.Width;

  D3D12_HEAP_PROPERTIES heapProps;
  heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
  heapProps.CreationNodeMask = 1;
  heapProps.VisibleNodeMask = 1;

  D3D12_RESOURCE_DESC currentOverlayDesc;
  RDCEraseEl(currentOverlayDesc);
  if(m_Overlay.Texture)
    currentOverlayDesc = m_Overlay.Texture->GetDesc();

  WrappedID3D12Resource *wrappedCustomRenderTex = (WrappedID3D12Resource *)m_Overlay.Texture;

  // need to recreate backing custom render tex
  if(overlayTexDesc.Width != currentOverlayDesc.Width ||
     overlayTexDesc.Height != currentOverlayDesc.Height ||
     overlayTexDesc.Format != currentOverlayDesc.Format ||
     overlayTexDesc.DepthOrArraySize != currentOverlayDesc.DepthOrArraySize ||
     overlayTexDesc.MipLevels != currentOverlayDesc.MipLevels ||
     overlayTexDesc.SampleDesc.Count != currentOverlayDesc.SampleDesc.Count ||
     overlayTexDesc.SampleDesc.Quality != currentOverlayDesc.SampleDesc.Quality)
  {
    SAFE_RELEASE(m_Overlay.Texture);
    m_Overlay.resourceId = ResourceId();

    ID3D12Resource *customRenderTex = NULL;
    HRESULT hr = m_pDevice->CreateCommittedResource(
        &heapProps, D3D12_HEAP_FLAG_NONE, &overlayTexDesc, D3D12_RESOURCE_STATE_RENDER_TARGET, NULL,
        __uuidof(ID3D12Resource), (void **)&customRenderTex);
    if(FAILED(hr))
    {
      RDCERR("Failed to create custom render tex HRESULT: %s", ToStr(hr).c_str());
      return ResourceId();
    }
    wrappedCustomRenderTex = (WrappedID3D12Resource *)customRenderTex;

    customRenderTex->SetName(L"customRenderTex");

    m_Overlay.Texture = wrappedCustomRenderTex;
    m_Overlay.resourceId = wrappedCustomRenderTex->GetResourceID();
  }

  D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;

  ID3D12Resource *renderDepth = NULL;

  D3D12Descriptor dsView = rs.dsv;

  D3D12_RESOURCE_DESC depthTexDesc = {};
  D3D12_DEPTH_STENCIL_VIEW_DESC dsViewDesc = {};
  if(dsView.GetResResourceId() != ResourceId())
  {
    ID3D12Resource *realDepth =
        m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(dsView.GetResResourceId());

    dsViewDesc = dsView.GetDSV();

    depthTexDesc = realDepth->GetDesc();
    depthTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
    depthTexDesc.Alignment = 0;

    HRESULT hr = S_OK;

    hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc,
                                            D3D12_RESOURCE_STATE_COPY_DEST, NULL,
                                            __uuidof(ID3D12Resource), (void **)&renderDepth);
    if(FAILED(hr))
    {
      RDCERR("Failed to create renderDepth HRESULT: %s", ToStr(hr).c_str());
      return m_Overlay.resourceId;
    }

    renderDepth->SetName(L"Overlay renderDepth");

    ID3D12GraphicsCommandListX *list = m_pDevice->GetNewList();
    if(!list)
      return ResourceId();

    BarrierSet barriers;
    barriers.Configure(realDepth, m_pDevice->GetSubresourceStates(GetResID(realDepth)),
                       BarrierSet::CopySourceAccess);

    barriers.Apply(list);

    list->CopyResource(renderDepth, realDepth);

    barriers.Unapply(list);

    D3D12_RESOURCE_BARRIER b = {};

    b.Transition.pResource = renderDepth;
    b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
    b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
    b.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;

    // prepare tex resource for writing
    list->ResourceBarrier(1, &b);

    list->Close();
  }

  D3D12_CPU_DESCRIPTOR_HANDLE rtv = GetDebugManager()->GetCPUHandle(OVERLAY_RTV);
  D3D12_RENDER_TARGET_VIEW_DESC rtDesc = {};
  rtDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;

  ID3D12GraphicsCommandListX *list = m_pDevice->GetNewList();
  if(!list)
    return ResourceId();

  // clear all mips and all slices first
  for(UINT mip = 0; mip < overlayTexDesc.MipLevels; mip++)
  {
    SetRTVDesc(rtDesc, overlayTexDesc,
               RenderOutputSubresource(mip, 0, overlayTexDesc.DepthOrArraySize));

    m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv);
    FLOAT black[] = {0.0f, 0.0f, 0.0f, 0.0f};
    list->ClearRenderTargetView(rtv, black, 0, NULL);
  }

  SetRTVDesc(rtDesc, overlayTexDesc, sub);
  m_pDevice->CreateRenderTargetView(wrappedCustomRenderTex, &rtDesc, rtv);

  D3D12_CPU_DESCRIPTOR_HANDLE dsv = {};

  if(renderDepth)
  {
    dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV);
    m_pDevice->CreateDepthStencilView(
        renderDepth, dsViewDesc.Format == DXGI_FORMAT_UNKNOWN ? NULL : &dsViewDesc, dsv);
  }

  WrappedID3D12PipelineState *pipe = NULL;

  if(rs.pipe != ResourceId())
    pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

  if(overlay == DebugOverlay::NaN || overlay == DebugOverlay::Clipping)
  {
    // just need the basic texture
  }
  else if(overlay == DebugOverlay::Drawcall)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC psoDesc;
      pipe->Fill(psoDesc);

      bool dxil =
          psoDesc.MS.BytecodeLength > 0 ||
          DXBC::DXBCContainer::CheckForDXIL(psoDesc.VS.pShaderBytecode, psoDesc.VS.BytecodeLength);

      ID3DBlob *ps =
          m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::HIGHLIGHT, dxil);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;
      psoDesc.DepthStencilState.DepthBoundsTestEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats.RTFormats);
      psoDesc.RTVFormats.RTFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
      psoDesc.RTVFormats.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_ALIASED;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.5f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      if(!ps)
      {
        m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High,
                                   MessageSource::UnsupportedConfiguration,
                                   "No DXIL shader available for overlay");
        return m_Overlay.resourceId;
      }

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreatePipeState(psoDesc, &pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rts.resize(1);
      rs.rts[0] = *GetWrapped(rtv);
      RDCEraseEl(rs.dsv);

      for(D3D12_RECT &r : rs.scissors)
        r = {0, 0, 32768, 32768};

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::BackfaceCull)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC psoDesc;
      pipe->Fill(psoDesc);

      D3D12_CULL_MODE origCull = psoDesc.RasterizerState.CullMode;
      BOOL origFrontCCW = psoDesc.RasterizerState.FrontCounterClockwise;

      bool dxil =
          psoDesc.MS.BytecodeLength > 0 ||
          DXBC::DXBCContainer::CheckForDXIL(psoDesc.VS.pShaderBytecode, psoDesc.VS.BytecodeLength);

      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::RED, dxil);
      ID3DBlob *green =
          m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::GREEN, dxil);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats.RTFormats);
      psoDesc.RTVFormats.RTFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
      psoDesc.RTVFormats.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_ALIASED;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.0f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      if(!red || !green)
      {
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High,
                                   MessageSource::UnsupportedConfiguration,
                                   "No DXIL shader available for overlay");
        return m_Overlay.resourceId;
      }

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      ID3D12PipelineState *redPSO = NULL;
      HRESULT hr = m_pDevice->CreatePipeState(psoDesc, &redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      psoDesc.RasterizerState.CullMode = origCull;
      psoDesc.RasterizerState.FrontCounterClockwise = origFrontCCW;
      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreatePipeState(psoDesc, &greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(redPSO);
      rs.rts.resize(1);
      rs.rts[0] = *GetWrapped(rtv);
      RDCEraseEl(rs.dsv);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs.pipe = GetResID(greenPSO);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(green);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(greenPSO);
    }
  }
  else if(overlay == DebugOverlay::Wireframe)
  {
    if(pipe && pipe->IsGraphics())
    {
      D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC psoDesc;
      pipe->Fill(psoDesc);

      bool dxil =
          psoDesc.MS.BytecodeLength > 0 ||
          DXBC::DXBCContainer::CheckForDXIL(psoDesc.VS.pShaderBytecode, psoDesc.VS.BytecodeLength);

      ID3DBlob *ps =
          m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::WIREFRAME, dxil);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats.RTFormats);
      psoDesc.RTVFormats.RTFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
      psoDesc.RTVFormats.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_WIREFRAME;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_ALIASED;

      float wireClearCol[4] = {200.0f / 255.0f, 255.0f / 255.0f, 0.0f / 255.0f, 0.0f};
      list->ClearRenderTargetView(rtv, wireClearCol, 0, NULL);

      list->Close();
      list = NULL;

      if(!ps)
      {
        m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High,
                                   MessageSource::UnsupportedConfiguration,
                                   "No DXIL shader available for overlay");
        return m_Overlay.resourceId;
      }

      psoDesc.PS.pShaderBytecode = ps->GetBufferPointer();
      psoDesc.PS.BytecodeLength = ps->GetBufferSize();

      ID3D12PipelineState *pso = NULL;
      HRESULT hr = m_pDevice->CreatePipeState(psoDesc, &pso);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(ps);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(pso);
      rs.rts.resize(1);
      rs.rts[0] = *GetWrapped(rtv);
      RDCEraseEl(rs.dsv);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(pso);
      SAFE_RELEASE(ps);
    }
  }
  else if(overlay == DebugOverlay::ClearBeforePass || overlay == DebugOverlay::ClearBeforeDraw)
  {
    rdcarray<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::ClearBeforeDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      list->Close();
      list = NULL;

      rdcarray<D3D12Descriptor> rts = rs.rts;

      if(overlay == DebugOverlay::ClearBeforePass)
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      list = m_pDevice->GetNewList();
      if(!list)
        return ResourceId();

      for(size_t i = 0; i < rts.size(); i++)
      {
        const D3D12Descriptor &desc = rts[i];

        if(desc.GetResResourceId() != ResourceId())
          Unwrap(list)->ClearRenderTargetView(Unwrap(GetDebugManager()->GetTempDescriptor(desc)),
                                              &clearCol.x, 0, NULL);
      }

      // Try to clear depth as well, to help debug shadow rendering
      if(rs.dsv.GetResResourceId() != ResourceId() && IsDepthFormat(resourceDesc.Format))
      {
        WrappedID3D12PipelineState *origPSO =
            m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);
        if(origPSO && origPSO->IsGraphics())
        {
          D3D12_COMPARISON_FUNC depthFunc = origPSO->graphics->DepthStencilState.DepthFunc;

          // If the depth func is equal or not equal, don't clear at all since the output would be
          // altered in an way that would cause replay to produce mostly incorrect results.
          // Similarly, skip if the depth func is always, as we'd have a 50% chance of guessing the
          // wrong clear value.
          if(depthFunc != D3D12_COMPARISON_FUNC_EQUAL &&
             depthFunc != D3D12_COMPARISON_FUNC_NOT_EQUAL &&
             depthFunc != D3D12_COMPARISON_FUNC_ALWAYS)
          {
            // If the depth func is less or less equal, clear to 1 instead of 0
            bool depthFuncLess = depthFunc == D3D12_COMPARISON_FUNC_LESS ||
                                 depthFunc == D3D12_COMPARISON_FUNC_LESS_EQUAL;
            float depthClear = depthFuncLess ? 1.0f : 0.0f;

            Unwrap(list)->ClearDepthStencilView(Unwrap(GetDebugManager()->GetTempDescriptor(rs.dsv)),
                                                D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL,
                                                depthClear, 0, 0, NULL);
          }
        }
      }

      list->Close();
      list = NULL;

      for(size_t i = 0; i < events.size(); i++)
      {
        m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw);

        if(overlay == DebugOverlay::ClearBeforePass && i + 1 < events.size())
          m_pDevice->ReplayLog(events[i] + 1, events[i + 1], eReplay_WithoutDraw);
      }
    }
  }
  else if(overlay == DebugOverlay::ViewportScissor)
  {
    if(pipe && pipe->IsGraphics() && !rs.views.empty())
    {
      D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC psoDesc;
      pipe->Fill(psoDesc);

      bool dxil =
          psoDesc.MS.BytecodeLength > 0 ||
          DXBC::DXBCContainer::CheckForDXIL(psoDesc.VS.pShaderBytecode, psoDesc.VS.BytecodeLength);

      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::RED, dxil);
      ID3DBlob *green =
          m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::GREEN, dxil);

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;

      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;
      RDCEraseEl(psoDesc.RTVFormats.RTFormats);
      psoDesc.RTVFormats.RTFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
      psoDesc.RTVFormats.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.FrontCounterClockwise = FALSE;
      psoDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
      psoDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;
      psoDesc.RasterizerState.LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_ALIASED;

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      ID3D12PipelineState *redPSO = NULL;
      HRESULT hr = m_pDevice->CreatePipeState(psoDesc, &redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        return m_Overlay.resourceId;
      }

      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreatePipeState(psoDesc, &greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(redPSO);
        SAFE_RELEASE(green);
        SAFE_RELEASE(greenPSO);
        return m_Overlay.resourceId;
      }

      list->Close();
      list = NULL;

      D3D12_RECT scissor = {0, 0, 16384, 16384};

      D3D12RenderState prev = rs;

      rs.rts = {*GetWrapped(rtv)};

      for(D3D12_RECT &s : rs.scissors)
        s = scissor;

      rs.pipe = GetResID(redPSO);
      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs.scissors = prev.scissors;

      rs.pipe = GetResID(greenPSO);
      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      list = m_pDevice->GetNewList();
      if(!list)
        return ResourceId();

      rs.ApplyState(m_pDevice, list);

      list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

      D3D12_VIEWPORT viewport = rs.views[0];
      list->RSSetViewports(1, &viewport);

      list->RSSetScissorRects(1, &scissor);

      list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

      list->SetPipelineState(
          m_General.CheckerboardF16Pipe[Log2Floor(overlayTexDesc.SampleDesc.Count)]);

      list->SetGraphicsRootSignature(m_General.CheckerboardRootSig);

      CheckerboardCBuffer pixelData = {0};

      pixelData.BorderWidth = 3;
      pixelData.CheckerSquareDimension = 16.0f;

      // set primary/secondary to the same to 'disable' checkerboard
      pixelData.PrimaryColor = pixelData.SecondaryColor = Vec4f(0.1f, 0.1f, 0.1f, 1.0f);
      pixelData.InnerColor = Vec4f(0.2f, 0.2f, 0.9f, 0.4f);

      // set viewport rect
      pixelData.RectPosition = Vec2f(viewport.TopLeftX, viewport.TopLeftY);
      pixelData.RectSize = Vec2f(viewport.Width, viewport.Height);

      D3D12_GPU_VIRTUAL_ADDRESS viewCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(0, viewCB);

      float factor[4] = {1.0f, 1.0f, 1.0f, 1.0f};
      list->OMSetBlendFactor(factor);

      list->DrawInstanced(3, 1, 0, 0);

      if(rs.scissors.empty())
      {
        viewport = {};
      }
      else
      {
        viewport.TopLeftX = (float)rs.scissors[0].left;
        viewport.TopLeftY = (float)rs.scissors[0].top;
        viewport.Width = (float)(rs.scissors[0].right - rs.scissors[0].left);
        viewport.Height = (float)(rs.scissors[0].bottom - rs.scissors[0].top);
      }
      list->RSSetViewports(1, &viewport);

      // black/white checkered border
      pixelData.PrimaryColor = Vec4f(1.0f, 1.0f, 1.0f, 1.0f);
      pixelData.SecondaryColor = Vec4f(0.0f, 0.0f, 0.0f, 1.0f);

      // nothing at all inside
      pixelData.InnerColor = Vec4f(0.0f, 0.0f, 0.0f, 0.0f);

      // set scissor rect
      pixelData.RectPosition = Vec2f(viewport.TopLeftX, viewport.TopLeftY);
      pixelData.RectSize = Vec2f(viewport.Width, viewport.Height);

      D3D12_GPU_VIRTUAL_ADDRESS scissorCB =
          GetDebugManager()->UploadConstants(&pixelData, sizeof(pixelData));

      list->SetGraphicsRootConstantBufferView(0, scissorCB);

      list->DrawInstanced(3, 1, 0, 0);

      list->Close();
      list = NULL;

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(green);
      SAFE_RELEASE(greenPSO);
    }
  }
  else if(overlay == DebugOverlay::TriangleSizeDraw || overlay == DebugOverlay::TriangleSizePass)
  {
    if(pipe && pipe->IsGraphics())
    {
      SCOPED_TIMER("Triangle size");

      rdcarray<uint32_t> events = passEvents;

      if(overlay == DebugOverlay::TriangleSizeDraw)
        events.clear();

      while(!events.empty())
      {
        const ActionDescription *action = m_pDevice->GetAction(events[0]);

        // remove any non-drawcalls, like the pass boundary.
        if(!(action->flags & (ActionFlags::MeshDispatch | ActionFlags::Drawcall)))
          events.erase(0);
        else
          break;
      }

      events.push_back(eventId);

      if(overlay == DebugOverlay::TriangleSizePass)
      {
        list->Close();
        list = NULL;

        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
      }

      pipe = m_pDevice->GetResourceManager()->GetCurrentAs<WrappedID3D12PipelineState>(rs.pipe);

      D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC pipeDesc;
      pipe->Fill(pipeDesc);
      pipeDesc.pRootSignature = GetDebugManager()->GetMeshRootSig();
      pipeDesc.SampleMask = 0xFFFFFFFF;
      pipeDesc.SampleDesc = overlayTexDesc.SampleDesc;
      pipeDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;

      RDCEraseEl(pipeDesc.RTVFormats.RTFormats);
      pipeDesc.RTVFormats.NumRenderTargets = 1;
      pipeDesc.RTVFormats.RTFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
      pipeDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      pipeDesc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
      pipeDesc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
      pipeDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;

      D3D12_INPUT_ELEMENT_DESC ia[2] = {};
      ia[0].SemanticName = "pos";
      ia[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].SemanticName = "sec";
      ia[1].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
      ia[1].InputSlot = 1;
      ia[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;

      pipeDesc.InputLayout.NumElements = 2;
      pipeDesc.InputLayout.pInputElementDescs = ia;

      pipeDesc.VS.BytecodeLength = m_Overlay.MeshVS->GetBufferSize();
      pipeDesc.VS.pShaderBytecode = m_Overlay.MeshVS->GetBufferPointer();
      RDCEraseEl(pipeDesc.HS);
      RDCEraseEl(pipeDesc.DS);
      RDCEraseEl(pipeDesc.AS);
      RDCEraseEl(pipeDesc.MS);
      pipeDesc.GS.BytecodeLength = m_Overlay.TriangleSizeGS->GetBufferSize();
      pipeDesc.GS.pShaderBytecode = m_Overlay.TriangleSizeGS->GetBufferPointer();
      pipeDesc.PS.BytecodeLength = m_Overlay.TriangleSizePS->GetBufferSize();
      pipeDesc.PS.pShaderBytecode = m_Overlay.TriangleSizePS->GetBufferPointer();

      pipeDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;

      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_GREATER)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_GREATER_EQUAL;
      if(pipeDesc.DepthStencilState.DepthFunc == D3D12_COMPARISON_FUNC_LESS)
        pipeDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;

      // enough for all primitive topology types
      ID3D12PipelineState *pipes[D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH + 1] = {};

      MeshVertexCBuffer vertexData = {};
      vertexData.ModelViewProj = Matrix4f::Identity();
      vertexData.SpriteSize = Vec2f();
      vertexData.homogenousInput = 1U;

      D3D12RenderState::SignatureElement vertexElem(eRootCBV, ResourceId(), 0);
      WrappedID3D12Resource::GetResIDFromAddr(
          GetDebugManager()->UploadConstants(&vertexData, sizeof(vertexData)), vertexElem.id,
          vertexElem.offset);

      for(size_t i = 0; i < events.size(); i++)
      {
        D3D12RenderState prevState = rs;

        Vec4f viewport;

        if(!rs.views.empty())
          viewport = Vec4f(rs.views[0].Width, rs.views[0].Height);

        D3D12RenderState::SignatureElement viewportElem(eRootCBV, ResourceId(), 0);
        WrappedID3D12Resource::GetResIDFromAddr(
            GetDebugManager()->UploadConstants(&viewport, sizeof(viewport)), viewportElem.id,
            viewportElem.offset);

        D3D12RenderState::SignatureElement viewportConstElem(eRootConst, ResourceId(), 0);
        viewportConstElem.SetConstants(4, &viewport, 0);

        rs.graphics.rootsig = GetResID(GetDebugManager()->GetMeshRootSig());
        rs.graphics.sigelems = {
            vertexElem,
            viewportElem,
            viewportConstElem,
        };

        rs.rts = {*(D3D12Descriptor *)rtv.ptr};

        if(list == NULL)
          list = m_pDevice->GetNewList();
        if(!list)
          return ResourceId();

        rs.ApplyState(m_pDevice, list);

        const ActionDescription *action = m_pDevice->GetAction(events[i]);

        for(uint32_t inst = 0; action && inst < RDCMAX(1U, action->numInstances); inst++)
        {
          MeshFormat fmt = GetPostVSBuffers(events[i], inst, 0, MeshDataStage::GSOut);
          if(fmt.vertexResourceId == ResourceId())
            fmt = GetPostVSBuffers(events[i], inst, 0, MeshDataStage::VSOut);

          if(fmt.vertexResourceId != ResourceId())
          {
            D3D_PRIMITIVE_TOPOLOGY topo = MakeD3DPrimitiveTopology(fmt.topology);

            // can't show triangle size for points or lines
            if(topo == D3D_PRIMITIVE_TOPOLOGY_POINTLIST ||
               topo >= D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST)
              continue;
            else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ ||
                    topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ)
              continue;
            else
              pipeDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

            list->IASetPrimitiveTopology(topo);

            if(pipes[pipeDesc.PrimitiveTopologyType] == NULL)
            {
              HRESULT hr =
                  m_pDevice->CreatePipeState(pipeDesc, &pipes[pipeDesc.PrimitiveTopologyType]);
              RDCASSERTEQUAL(hr, S_OK);
            }

            ID3D12Resource *vb =
                m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.vertexResourceId);

            D3D12_VERTEX_BUFFER_VIEW vbView = {};
            vbView.BufferLocation = vb->GetGPUVirtualAddress() + fmt.vertexByteOffset;
            vbView.StrideInBytes = fmt.vertexByteStride;
            vbView.SizeInBytes = UINT(vb->GetDesc().Width - fmt.vertexByteOffset);

            // second bind is just a dummy, so we don't have to make a shader
            // that doesn't accept the secondary stream
            list->IASetVertexBuffers(0, 1, &vbView);
            list->IASetVertexBuffers(1, 1, &vbView);

            list->SetPipelineState(pipes[pipeDesc.PrimitiveTopologyType]);

            if(fmt.indexByteStride && fmt.indexResourceId != ResourceId())
            {
              ID3D12Resource *ib =
                  m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(fmt.indexResourceId);

              D3D12_INDEX_BUFFER_VIEW view;
              view.BufferLocation = ib->GetGPUVirtualAddress() + fmt.indexByteOffset;
              view.SizeInBytes = UINT(ib->GetDesc().Width - fmt.indexByteOffset);
              view.Format = fmt.indexByteStride == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
              list->IASetIndexBuffer(&view);

              list->DrawIndexedInstanced(fmt.numIndices, 1, 0, fmt.baseVertex, 0);
            }
            else
            {
              list->DrawInstanced(fmt.numIndices, 1, 0, 0);
            }
          }
        }

        list->Close();
        list = NULL;

        rs = prevState;

        if(overlay == DebugOverlay::TriangleSizePass)
        {
          m_pDevice->ReplayLog(events[i], events[i], eReplay_OnlyDraw);

          if(i + 1 < events.size())
            m_pDevice->ReplayLog(events[i], events[i + 1], eReplay_WithoutDraw);
        }
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(size_t i = 0; i < ARRAY_COUNT(pipes); i++)
        SAFE_RELEASE(pipes[i]);
    }

    // restore back to normal
    m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
  else if(overlay == DebugOverlay::QuadOverdrawPass || overlay == DebugOverlay::QuadOverdrawDraw)
  {
    SCOPED_TIMER("Quad Overdraw");

    rdcarray<uint32_t> events = passEvents;

    if(overlay == DebugOverlay::QuadOverdrawDraw)
      events.clear();

    events.push_back(eventId);

    if(!events.empty())
    {
      if(overlay == DebugOverlay::QuadOverdrawPass)
      {
        list->Close();
        m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
        list = m_pDevice->GetNewList();
        if(!list)
          return ResourceId();
      }

      uint32_t width = uint32_t(RDCMAX(1ULL, overlayTexDesc.Width >> (sub.mip + 1)));
      uint32_t height = RDCMAX(1U, overlayTexDesc.Height >> (sub.mip + 1));

      width = RDCMAX(1U, width);
      height = RDCMAX(1U, height);

      D3D12_RESOURCE_DESC uavTexDesc = {};
      uavTexDesc.Alignment = 0;
      uavTexDesc.DepthOrArraySize = 4;
      uavTexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
      uavTexDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
      uavTexDesc.Format = DXGI_FORMAT_R32_UINT;
      uavTexDesc.Height = height;
      uavTexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
      uavTexDesc.MipLevels = 1;
      uavTexDesc.SampleDesc.Count = 1;
      uavTexDesc.SampleDesc.Quality = 0;
      uavTexDesc.Width = width;

      ID3D12Resource *overdrawTex = NULL;
      HRESULT hr = m_pDevice->CreateCommittedResource(
          &heapProps, D3D12_HEAP_FLAG_NONE, &uavTexDesc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
          NULL, __uuidof(ID3D12Resource), (void **)&overdrawTex);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overdrawTex HRESULT: %s", ToStr(hr).c_str());
        list->Close();
        list = NULL;
        return m_Overlay.resourceId;
      }

      m_pDevice->CreateShaderResourceView(overdrawTex, NULL,
                                          GetDebugManager()->GetCPUHandle(OVERDRAW_SRV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetCPUHandle(OVERDRAW_UAV));
      m_pDevice->CreateUnorderedAccessView(overdrawTex, NULL, NULL,
                                           GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV));

      GetDebugManager()->SetDescriptorHeaps(list, true, false);

      UINT zeroes[4] = {0, 0, 0, 0};
      list->ClearUnorderedAccessViewUint(GetDebugManager()->GetGPUHandle(OVERDRAW_UAV),
                                         GetDebugManager()->GetUAVClearHandle(OVERDRAW_UAV),
                                         overdrawTex, zeroes, 0, NULL);
      list->Close();
      list = NULL;

      if(D3D12_Debug_SingleSubmitFlushing())
      {
        m_pDevice->ExecuteLists();
        m_pDevice->FlushLists();
      }

      m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);

      ID3D12Resource *overrideDepth = NULL;

      ResourceId res = rs.GetDSVID();

      ID3D12Resource *curDepth = m_pDevice->GetResourceManager()->GetCurrentAs<ID3D12Resource>(res);
      D3D12_RESOURCE_DESC curDepthDesc = curDepth ? curDepth->GetDesc() : D3D12_RESOURCE_DESC();
      if(curDepthDesc.SampleDesc.Count > 1)
      {
        curDepthDesc.Alignment = 0;
        curDepthDesc.DepthOrArraySize *= (UINT16)curDepthDesc.SampleDesc.Count;
        curDepthDesc.SampleDesc.Count = 1;
        curDepthDesc.SampleDesc.Quality = 0;

        hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &curDepthDesc,
                                                D3D12_RESOURCE_STATE_COMMON, NULL,
                                                __uuidof(ID3D12Resource), (void **)&overrideDepth);
        if(FAILED(hr))
        {
          RDCERR("Failed to create overrideDepth HRESULT: %s", ToStr(hr).c_str());
          return m_Overlay.resourceId;
        }

        dsv = GetDebugManager()->GetCPUHandle(OVERLAY_DSV);

        D3D12_DEPTH_STENCIL_VIEW_DESC viewDesc = rs.dsv.GetDSV();
        viewDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
        viewDesc.Texture2DArray.ArraySize = 1;
        viewDesc.Texture2DArray.FirstArraySlice = 0;
        viewDesc.Texture2DArray.MipSlice = 0;

        m_pDevice->CreateDepthStencilView(overrideDepth, &viewDesc, dsv);
      }

      // declare callback struct here
      D3D12QuadOverdrawCallback cb(m_pDevice, events, overrideDepth, overrideDepth ? curDepth : NULL,
                                   overrideDepth ? ToPortableHandle(dsv) : PortableHandle(),
                                   ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)));

      m_pDevice->ReplayLog(events.front(), events.back(), eReplay_Full);

      // resolve pass
      {
        list = m_pDevice->GetNewList();
        if(!list)
          return ResourceId();

        D3D12_RESOURCE_BARRIER overdrawBarriers[2] = {};

        // make sure UAV work is done then prepare for reading in PS
        overdrawBarriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
        overdrawBarriers[0].UAV.pResource = overdrawTex;
        overdrawBarriers[1].Transition.pResource = overdrawTex;
        overdrawBarriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
        overdrawBarriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
        overdrawBarriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;

        // prepare tex resource for copying
        list->ResourceBarrier(2, overdrawBarriers);

        list->OMSetRenderTargets(1, &rtv, TRUE, NULL);

        D3D12_VIEWPORT view = {0.0f, 0.0f, (float)resourceDesc.Width, (float)resourceDesc.Height,
                               0.0f, 1.0f};
        list->RSSetViewports(1, &view);

        D3D12_RECT scissor = {0, 0, 16384, 16384};
        list->RSSetScissorRects(1, &scissor);

        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        list->SetPipelineState(m_Overlay.QuadResolvePipe[Log2Floor(overlayTexDesc.SampleDesc.Count)]);

        list->SetGraphicsRootSignature(m_Overlay.QuadResolveRootSig);

        GetDebugManager()->SetDescriptorHeaps(list, true, false);

        list->SetGraphicsRootDescriptorTable(0, GetDebugManager()->GetGPUHandle(OVERDRAW_SRV));

        list->DrawInstanced(3, 1, 0, 0);

        list->Close();
        list = NULL;
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      for(auto it = cb.m_PipelineCache.begin(); it != cb.m_PipelineCache.end(); ++it)
      {
        SAFE_RELEASE(it->second.pipe);
        SAFE_RELEASE(it->second.sig);
      }

      SAFE_RELEASE(overdrawTex);
      SAFE_RELEASE(overrideDepth);
    }

    if(overlay == DebugOverlay::QuadOverdrawPass)
      m_pDevice->ReplayLog(0, eventId, eReplay_WithoutDraw);
  }
  else if(overlay == DebugOverlay::Depth || overlay == DebugOverlay::Stencil)
  {
    if(pipe && pipe->IsGraphics())
    {
      ID3D12Resource *renderDepthStencil = NULL;
      bool useDepthWriteStencilPass = (overlay == DebugOverlay::Depth) && renderDepth;

      if(useDepthWriteStencilPass)
      {
        useDepthWriteStencilPass = false;
        WrappedID3D12PipelineState::ShaderEntry *wrappedPS = pipe->PS();
        if(wrappedPS)
        {
          ShaderReflection &reflection = pipe->PS()->GetDetails();
          for(SigParameter &output : reflection.outputSignature)
          {
            if(output.systemValue == ShaderBuiltin::DepthOutput)
              useDepthWriteStencilPass = true;
          }
        }
      }

      HRESULT hr;
      DXGI_FORMAT dsFmt = dsViewDesc.Format;
      // the depth overlay uses stencil buffer as a mask for the passing pixels
      DXGI_FORMAT dsNewFmt = dsFmt;
      size_t fmtIndex = ARRAY_COUNT(m_Overlay.DepthCopyPipe);
      size_t sampleIndex = Log2Floor(overlayTexDesc.SampleDesc.Count);
      if(useDepthWriteStencilPass)
      {
        if(dsFmt == DXGI_FORMAT_D32_FLOAT_S8X24_UINT)
          dsNewFmt = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
        else if(dsFmt == DXGI_FORMAT_D24_UNORM_S8_UINT)
          dsNewFmt = DXGI_FORMAT_D24_UNORM_S8_UINT;
        else if(dsFmt == DXGI_FORMAT_D32_FLOAT)
          dsNewFmt = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
        else if(dsFmt == DXGI_FORMAT_D16_UNORM)
          dsNewFmt = DXGI_FORMAT_D24_UNORM_S8_UINT;
        else
          dsNewFmt = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;

        RDCASSERT((dsNewFmt == DXGI_FORMAT_D24_UNORM_S8_UINT) ||
                  (dsNewFmt == DXGI_FORMAT_D32_FLOAT_S8X24_UINT));
        fmtIndex = (dsNewFmt == DXGI_FORMAT_D24_UNORM_S8_UINT) ? 0 : 1;
        if(m_Overlay.DepthResolvePipe[fmtIndex][sampleIndex] == NULL)
        {
          RDCERR("Unhandled depth resolve format : %s", ToStr(dsNewFmt).c_str());
          useDepthWriteStencilPass = false;
        }

        if(m_Overlay.DepthCopyPipe[fmtIndex][sampleIndex] == NULL)
        {
          RDCERR("Unhandled depth copy format : %s", ToStr(dsNewFmt).c_str());
          useDepthWriteStencilPass = false;
        }

        // Currently depth-copy is only supported for Texture2D and Texture2DMS
        if(dsFmt != dsNewFmt)
        {
          if(depthTexDesc.DepthOrArraySize > 1)
            useDepthWriteStencilPass = false;
          if((dsViewDesc.ViewDimension != D3D12_DSV_DIMENSION_TEXTURE2D) &&
             (dsViewDesc.ViewDimension != D3D12_DSV_DIMENSION_TEXTURE2DMS))
            useDepthWriteStencilPass = false;
        }
        if(!useDepthWriteStencilPass)
        {
          RDCWARN("Depth overlay using fallback method instead of stencil mask");
          dsNewFmt = dsFmt;
        }
      }
      if(useDepthWriteStencilPass)
      {
        // copy depth over to a new depth-stencil buffer
        if(dsFmt != dsNewFmt)
        {
          D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
          srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
          if(overlayTexDesc.SampleDesc.Count == 1)
          {
            srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
            srvDesc.Texture2D.MipLevels = ~0U;
            srvDesc.Texture2D.MostDetailedMip = 0;
            srvDesc.Texture2D.PlaneSlice = 0;
            srvDesc.Texture2D.ResourceMinLODClamp = 0.0f;
          }
          else
          {
            srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS;
          }

          srvDesc.Format = DXGI_FORMAT_UNKNOWN;
          switch(dsFmt)
          {
            case DXGI_FORMAT_D32_FLOAT:
            case DXGI_FORMAT_R32_FLOAT:
            case DXGI_FORMAT_R32_TYPELESS: srvDesc.Format = DXGI_FORMAT_R32_FLOAT; break;

            case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
            case DXGI_FORMAT_R32G8X24_TYPELESS:
            case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
            case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
              srvDesc.Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
              break;

            case DXGI_FORMAT_D24_UNORM_S8_UINT:
            case DXGI_FORMAT_R24G8_TYPELESS:
            case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
            case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
              srvDesc.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
              break;

            case DXGI_FORMAT_D16_UNORM:
            case DXGI_FORMAT_R16_TYPELESS: srvDesc.Format = DXGI_FORMAT_R16_UNORM; break;

            default: break;
          }
          if(srvDesc.Format == DXGI_FORMAT_UNKNOWN)
          {
            RDCERR("Unknown Depth overlay format %s", dsFmt);
            SAFE_RELEASE(renderDepth);
            return m_Overlay.resourceId;
          }

          m_pDevice->CreateShaderResourceView(renderDepth, &srvDesc,
                                              GetDebugManager()->GetCPUHandle(DEPTH_COPY_SRV));

          // New depth-stencil texture
          dsFmt = dsNewFmt;
          depthTexDesc.Format = dsFmt;
          hr = m_pDevice->CreateCommittedResource(
              &heapProps, D3D12_HEAP_FLAG_NONE, &depthTexDesc, D3D12_RESOURCE_STATE_DEPTH_WRITE,
              NULL, __uuidof(ID3D12Resource), (void **)&renderDepthStencil);
          if(FAILED(hr))
          {
            RDCERR("Failed to create renderDepthStencil HRESULT: %s", ToStr(hr).c_str());
            SAFE_RELEASE(renderDepth);
            return m_Overlay.resourceId;
          }

          // Copy renderDepth depth data into renderDepthStencil depth data using fullscreen pass
          // the shader writes 0 to the stencil during the copy
          D3D12_RESOURCE_BARRIER b = {};

          b.Transition.pResource = renderDepth;
          b.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
          b.Transition.StateBefore = D3D12_RESOURCE_STATE_DEPTH_WRITE;
          b.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
          list->ResourceBarrier(1, &b);

          D3D12_DEPTH_STENCIL_VIEW_DESC dsNewViewDesc = dsViewDesc;
          dsNewViewDesc.Format = dsFmt;
          m_pDevice->CreateDepthStencilView(renderDepthStencil, &dsNewViewDesc, dsv);

          list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

          D3D12_VIEWPORT view = {0.0f, 0.0f, (float)resourceDesc.Width, (float)resourceDesc.Height,
                                 0.0f, 1.0f};
          list->RSSetViewports(1, &view);

          D3D12_RECT scissor = {0, 0, 16384, 16384};
          list->RSSetScissorRects(1, &scissor);

          list->SetPipelineState(m_Overlay.DepthCopyPipe[fmtIndex][sampleIndex]);
          list->SetGraphicsRootSignature(m_Overlay.DepthCopyResolveRootSig);

          GetDebugManager()->SetDescriptorHeaps(list, true, false);
          list->SetGraphicsRootDescriptorTable(0, GetDebugManager()->GetGPUHandle(DEPTH_COPY_SRV));

          list->OMSetRenderTargets(0, NULL, FALSE, &dsv);

          list->DrawInstanced(3, 1, 0, 0);

          rs.ApplyState(m_pDevice, list);
        }
      }

      D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC psoDesc;
      pipe->Fill(psoDesc);

      bool dxil =
          psoDesc.MS.BytecodeLength > 0 ||
          DXBC::DXBCContainer::CheckForDXIL(psoDesc.VS.pShaderBytecode, psoDesc.VS.BytecodeLength);

      ID3DBlob *red = m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::RED, dxil);
      ID3DBlob *green =
          m_pDevice->GetShaderCache()->MakeFixedColShader(D3D12ShaderCache::GREEN, dxil);

      D3D12_SHADER_BYTECODE originalPS = psoDesc.PS;

      // make sure that if a test is disabled, it shows all
      // pixels passing
      if(!psoDesc.DepthStencilState.DepthEnable)
        psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
      if(!psoDesc.DepthStencilState.StencilEnable)
      {
        psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
        psoDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
      }

      if(useDepthWriteStencilPass)
      {
        // Do not replace shader
        // disable colour write
        psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0x0;
        // Write stencil 0x1 for depth passing pixels
        psoDesc.DepthStencilState.StencilEnable = TRUE;
        psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
        psoDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
        psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
        psoDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
        psoDesc.DepthStencilState.FrontFace.StencilReadMask = 0xff;
        psoDesc.DepthStencilState.FrontFace.StencilWriteMask = 0xff;
        psoDesc.DepthStencilState.BackFace = psoDesc.DepthStencilState.FrontFace;
      }
      else
      {
        psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
        if(overlay == DebugOverlay::Depth)
        {
          psoDesc.DepthStencilState.StencilEnable = FALSE;
          psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
          psoDesc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
        }
        else
        {
          psoDesc.DepthStencilState.DepthEnable = FALSE;
          psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
          psoDesc.DepthStencilState.DepthBoundsTestEnable = FALSE;
        }
      }

      RDCEraseEl(psoDesc.RTVFormats.RTFormats);
      psoDesc.RTVFormats.RTFormats[0] = DXGI_FORMAT_R16G16B16A16_FLOAT;
      psoDesc.RTVFormats.NumRenderTargets = 1;
      psoDesc.SampleMask = ~0U;
      psoDesc.SampleDesc.Count = RDCMAX(1U, psoDesc.SampleDesc.Count);
      psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
      psoDesc.BlendState.IndependentBlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].BlendEnable = FALSE;
      psoDesc.BlendState.RenderTarget[0].LogicOpEnable = FALSE;

      psoDesc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
      psoDesc.RasterizerState.LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_ALIASED;

      float clearColour[] = {0.0f, 0.0f, 0.0f, 0.0f};
      list->ClearRenderTargetView(rtv, clearColour, 0, NULL);

      list->Close();
      list = NULL;

      if(!red || !green)
      {
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        SAFE_RELEASE(renderDepthStencil);
        SAFE_RELEASE(renderDepth);
        m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High,
                                   MessageSource::UnsupportedConfiguration,
                                   "No DXIL shader available for overlay");
        return m_Overlay.resourceId;
      }

      psoDesc.PS.pShaderBytecode = green->GetBufferPointer();
      psoDesc.PS.BytecodeLength = green->GetBufferSize();

      ID3D12PipelineState *greenPSO = NULL;
      hr = m_pDevice->CreatePipeState(psoDesc, &greenPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        SAFE_RELEASE(renderDepthStencil);
        SAFE_RELEASE(renderDepth);
        return m_Overlay.resourceId;
      }

      ID3D12PipelineState *depthWriteStencilPSO = NULL;
      if(useDepthWriteStencilPass)
      {
        psoDesc.DSVFormat = dsFmt;
        psoDesc.PS = originalPS;

        hr = m_pDevice->CreatePipeState(psoDesc, &depthWriteStencilPSO);
        if(FAILED(hr))
        {
          RDCERR("Failed to create depth write overlay pso HRESULT: %s", ToStr(hr).c_str());
          SAFE_RELEASE(greenPSO);
          SAFE_RELEASE(red);
          SAFE_RELEASE(green);
          SAFE_RELEASE(renderDepthStencil);
          SAFE_RELEASE(renderDepth);
          return m_Overlay.resourceId;
        }
      }

      psoDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;

      psoDesc.DepthStencilState.DepthEnable = FALSE;
      psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
      psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
      psoDesc.DepthStencilState.StencilEnable = FALSE;
      psoDesc.DepthStencilState.DepthBoundsTestEnable = FALSE;

      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
      psoDesc.RasterizerState.DepthClipEnable = FALSE;

      psoDesc.PS.pShaderBytecode = red->GetBufferPointer();
      psoDesc.PS.BytecodeLength = red->GetBufferSize();

      ID3D12PipelineState *redPSO = NULL;
      hr = m_pDevice->CreatePipeState(psoDesc, &redPSO);
      if(FAILED(hr))
      {
        RDCERR("Failed to create overlay pso HRESULT: %s", ToStr(hr).c_str());
        SAFE_RELEASE(depthWriteStencilPSO);
        SAFE_RELEASE(greenPSO);
        SAFE_RELEASE(red);
        SAFE_RELEASE(green);
        SAFE_RELEASE(renderDepthStencil);
        SAFE_RELEASE(renderDepth);
        return m_Overlay.resourceId;
      }

      D3D12RenderState prev = rs;

      rs.pipe = GetResID(redPSO);
      rs.rts.resize(1);
      rs.rts[0] = *GetWrapped(rtv);
      if(dsv.ptr)
        rs.dsv = *GetWrapped(dsv);
      else
        RDCEraseEl(rs.dsv);

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      if(useDepthWriteStencilPass)
      {
        rs.stencilRefBack = rs.stencilRefFront = 0x1;
        rs.pipe = GetResID(depthWriteStencilPSO);
      }
      else
      {
        rs.pipe = GetResID(greenPSO);
      }

      if(useDepthWriteStencilPass)
      {
        list = m_pDevice->GetNewList();
        if(!list)
          return ResourceId();
        list->ClearDepthStencilView(dsv, D3D12_CLEAR_FLAG_STENCIL, 0.0f, 0, 0, NULL);
        list->Close();
        list = NULL;
      }

      m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);

      rs = prev;

      if(useDepthWriteStencilPass)
      {
        // Resolve stencil = 0x1 pixels to green
        list = m_pDevice->GetNewList();
        if(!list)
          return ResourceId();

        list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

        D3D12_VIEWPORT view = {0.0f, 0.0f, (float)resourceDesc.Width, (float)resourceDesc.Height,
                               0.0f, 1.0f};
        list->RSSetViewports(1, &view);

        D3D12_RECT scissor = {0, 0, 16384, 16384};
        list->RSSetScissorRects(1, &scissor);

        RDCASSERT((dsFmt == DXGI_FORMAT_D24_UNORM_S8_UINT) ||
                  (dsFmt == DXGI_FORMAT_D32_FLOAT_S8X24_UINT));
        fmtIndex = (dsFmt == DXGI_FORMAT_D24_UNORM_S8_UINT) ? 0 : 1;

        list->SetPipelineState(m_Overlay.DepthResolvePipe[fmtIndex][sampleIndex]);
        list->SetGraphicsRootSignature(m_Overlay.DepthCopyResolveRootSig);

        GetDebugManager()->SetDescriptorHeaps(list, true, false);
        list->SetGraphicsRootDescriptorTable(0, GetDebugManager()->GetGPUHandle(DEPTH_COPY_SRV));

        list->OMSetStencilRef(0x1);
        list->OMSetRenderTargets(1, &rtv, TRUE, &dsv);

        list->DrawInstanced(3, 1, 0, 0);

        list->Close();
        list = NULL;
      }

      m_pDevice->ExecuteLists();
      m_pDevice->FlushLists();

      SAFE_RELEASE(red);
      SAFE_RELEASE(green);
      SAFE_RELEASE(redPSO);
      SAFE_RELEASE(greenPSO);
      SAFE_RELEASE(depthWriteStencilPSO);
      SAFE_RELEASE(renderDepthStencil);
    }
  }
  else
  {
    RDCERR("Unhandled overlay case!");
  }

  if(list)
    list->Close();

  m_pDevice->ExecuteLists();
  m_pDevice->FlushLists();

  SAFE_RELEASE(renderDepth);

  return m_Overlay.resourceId;
}