rdcarray D3D11Replay::PixelHistory()

in renderdoc/driver/d3d11/d3d11_pixelhistory.cpp [287:2547]


rdcarray<PixelModification> D3D11Replay::PixelHistory(rdcarray<EventUsage> events,
                                                      ResourceId target, uint32_t x, uint32_t y,
                                                      const Subresource &sub, CompType typeCast)
{
  rdcarray<PixelModification> history;

  // this function needs a *huge* amount of tidying, refactoring and documenting.

  if(events.empty())
    return history;

  // cache the texture details of the destination texture that we're doing the pixel history on
  TextureShaderDetails details = GetDebugManager()->GetShaderDetails(target, typeCast, true);

  if(details.texFmt == DXGI_FORMAT_UNKNOWN)
    return history;

  uint32_t slice = sub.slice;
  uint32_t mip = sub.mip;
  uint32_t sampleIdx = sub.sample;

  D3D11MarkerRegion historyMarker(StringFormat::Fmt(
      "Doing PixelHistory on %s, (%u,%u) %u, %u, %u over %u events", ToStr(target).c_str(), x, y,
      slice, mip, sampleIdx, (uint32_t)events.size()));

  // Use the given type hint for typeless textures
  details.texFmt = GetTypedFormat(details.texFmt, typeCast);
  details.texFmt = GetNonSRGBFormat(details.texFmt);

  SCOPED_TIMER("D3D11DebugManager::PixelHistory");

  if(sampleIdx > details.sampleCount)
    sampleIdx = 0;

  uint32_t sampleMask = ~0U;
  if(sampleIdx < 32)
    sampleMask = 1U << sampleIdx;

  bool multisampled = (details.sampleCount > 1);

  // sampleIdx used later for deciding subresource to read from, so
  // set it to 0 for the no-sample case (resolved, or never MSAA in the
  // first place).
  if(sampleIdx == ~0U || !multisampled)
    sampleIdx = 0;

  // needed for comparison with viewports
  float xf = (float)x;
  float yf = (float)y;

  RDCDEBUG("Checking Pixel History on %s (%u, %u) with %u possible events", ToStr(target).c_str(),
           x, y, (uint32_t)events.size());

  // these occlusion queries are run with every test possible disabled
  rdcarray<ID3D11Query *> occl;
  occl.reserve(events.size());

  ID3D11Query *testQueries[6] = {0};    // one query for each test we do per-drawcall

  // reserve worst case all events. This is used for Pre value, Post value and
  // # frag overdraw (with & without original shader). It's reused later to retrieve per-fragment
  // post values.
  uint32_t pixstoreSlots = (uint32_t)events.size();

  // We always allocate at least 2048 slots, to allow for pixel history that only touches a couple
  // of events still being able to overdraw many times. The idea being that if we're taking the
  // history over many events, then the events which don't take up any slots or only one will mostly
  // dominate over those that take more than the average. If we only have one or two candidate
  // events then at least 2048 slots gives a huge amount of potential overdraw.
  pixstoreSlots = RDCMAX(pixstoreSlots, 2048U);

  pixstoreSlots *= pixstoreStride;

  // need UAV compatible format, so switch B8G8R8A8 for R8G8B8A8, everything will
  // render as normal and it will just be swizzled (which we were doing manually anyway).
  if(details.texFmt == DXGI_FORMAT_B8G8R8A8_UNORM)
    details.texFmt = DXGI_FORMAT_R8G8B8A8_UNORM;

  // other transformations, B8G8R8X8 also as R8G8B8A8 (alpha will be ignored)
  if(details.texFmt == DXGI_FORMAT_B8G8R8X8_UNORM)
    details.texFmt = DXGI_FORMAT_R8G8B8A8_UNORM;

  // R32G32B32 as R32G32B32A32 (alpha will be ignored)
  if(details.texFmt == DXGI_FORMAT_R32G32B32_FLOAT)
    details.texFmt = DXGI_FORMAT_R32G32B32A32_FLOAT;
  if(details.texFmt == DXGI_FORMAT_R32G32B32_UINT)
    details.texFmt = DXGI_FORMAT_R32G32B32A32_UINT;
  if(details.texFmt == DXGI_FORMAT_R32G32B32_SINT)
    details.texFmt = DXGI_FORMAT_R32G32B32A32_SINT;

  // these formats are only valid for depth textures at which point pixstore doesn't matter, so it
  // can be anything.
  if(details.texFmt == DXGI_FORMAT_R24_UNORM_X8_TYPELESS ||
     details.texFmt == DXGI_FORMAT_X24_TYPELESS_G8_UINT ||
     details.texFmt == DXGI_FORMAT_R24G8_TYPELESS || details.texFmt == DXGI_FORMAT_D24_UNORM_S8_UINT ||

     details.texFmt == DXGI_FORMAT_D16_UNORM ||

     details.texFmt == DXGI_FORMAT_D32_FLOAT ||

     details.texFmt == DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS ||
     details.texFmt == DXGI_FORMAT_X32_TYPELESS_G8X24_UINT ||
     details.texFmt == DXGI_FORMAT_R32G8X24_TYPELESS ||
     details.texFmt == DXGI_FORMAT_D32_FLOAT_S8X24_UINT)
    details.texFmt = DXGI_FORMAT_R32G32B32A32_UINT;

  // define a buffer that we can copy before/after results into with PixelHistoryCopyPixel.
  // We previously used a texture but that doesn't always work - depth and MSAA textures can't use
  // CopySubresourceRegion to copy only one pixel, and copying with a UAV fails because some formats
  // don't support UAV. So instead we expand to float4/uint4/int4 in the UAV and write the full
  // expanded values here.
  D3D11_BUFFER_DESC pixstoreDesc = {
      (pixstoreSlots + 1) * sizeof(Vec4f),
      D3D11_USAGE_DEFAULT,
      D3D11_BIND_UNORDERED_ACCESS,
      0,
      0,
      0,
  };

  ID3D11Buffer *pixstore = NULL, *shadoutStore = NULL, *pixstoreDepth = NULL;
  m_pDevice->CreateBuffer(&pixstoreDesc, NULL, &pixstore);
  m_pDevice->CreateBuffer(&pixstoreDesc, NULL, &shadoutStore);
  m_pDevice->CreateBuffer(&pixstoreDesc, NULL, &pixstoreDepth);

  // we'll only use the first two components of pixstoreDepth but for simplicity we keep it the same
  // size.

  pixstoreDesc.Usage = D3D11_USAGE_STAGING;
  pixstoreDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
  pixstoreDesc.BindFlags = 0;

  ID3D11Buffer *pixstoreReadback = NULL, *shadoutStoreReadback = NULL, *pixstoreDepthReadback = NULL;
  m_pDevice->CreateBuffer(&pixstoreDesc, NULL, &pixstoreReadback);
  m_pDevice->CreateBuffer(&pixstoreDesc, NULL, &shadoutStoreReadback);
  m_pDevice->CreateBuffer(&pixstoreDesc, NULL, &pixstoreDepthReadback);

  // we create the UAV as typed

  D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
  uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
  uavDesc.Buffer.NumElements = pixstoreSlots + 1;

  bool floatTex = false, uintTex = false, intTex = false;

  if(IsUIntFormat(details.texFmt))
  {
    uintTex = true;
    uavDesc.Format = DXGI_FORMAT_R32G32B32A32_UINT;
  }
  else if(IsIntFormat(details.texFmt))
  {
    intTex = true;
    uavDesc.Format = DXGI_FORMAT_R32G32B32A32_SINT;
  }
  else
  {
    floatTex = true;
    uavDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
  }

  ID3D11UnorderedAccessView *pixstoreUAV = NULL, *shadoutStoreUAV = NULL, *pixstoreDepthUAV = NULL;
  m_pDevice->CreateUnorderedAccessView(pixstore, &uavDesc, &pixstoreUAV);
  m_pDevice->CreateUnorderedAccessView(shadoutStore, &uavDesc, &shadoutStoreUAV);
  m_pDevice->CreateUnorderedAccessView(pixstoreDepth, &uavDesc, &pixstoreDepthUAV);

  // very wasteful, but we must leave the viewport as is to get correct rasterisation which means
  // same dimensions of render target.
  D3D11_TEXTURE2D_DESC shadoutDesc = {
      details.texWidth,
      details.texHeight,
      1U,
      1U,
      DXGI_FORMAT_R32G32B32A32_FLOAT,
      {details.sampleCount, details.sampleQuality},
      D3D11_USAGE_DEFAULT,
      D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE,
      0,
      0,
  };
  ID3D11Texture2D *shadOutput = NULL;
  m_pDevice->CreateTexture2D(&shadoutDesc, NULL, &shadOutput);

  ID3D11ShaderResourceView *shadOutputSRV = NULL;
  m_pDevice->CreateShaderResourceView(shadOutput, NULL, &shadOutputSRV);

  ID3D11RenderTargetView *shadOutputRTV = NULL;
  m_pDevice->CreateRenderTargetView(shadOutput, NULL, &shadOutputRTV);

  shadoutDesc.Format = DXGI_FORMAT_R32G8X24_TYPELESS;
  shadoutDesc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE;
  ID3D11Texture2D *shaddepthOutput = NULL;
  m_pDevice->CreateTexture2D(&shadoutDesc, NULL, &shaddepthOutput);

  ID3D11DepthStencilView *shaddepthOutputDSV = NULL;
  {
    D3D11_DEPTH_STENCIL_VIEW_DESC desc;
    desc.Flags = 0;
    desc.Format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
    desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D;
    desc.Texture2D.MipSlice = 0;

    if(multisampled)
      desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DMS;

    m_pDevice->CreateDepthStencilView(shaddepthOutput, &desc, &shaddepthOutputDSV);
  }

  D3D11_SHADER_RESOURCE_VIEW_DESC copyDepthSRVDesc, copyStencilSRVDesc;
  copyDepthSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
  copyDepthSRVDesc.Texture2D.MipLevels = 1;
  copyDepthSRVDesc.Texture2D.MostDetailedMip = 0;
  copyStencilSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
  copyStencilSRVDesc.Texture2D.MipLevels = 1;
  copyStencilSRVDesc.Texture2D.MostDetailedMip = 0;

  if(multisampled)
    copyDepthSRVDesc.ViewDimension = copyStencilSRVDesc.ViewDimension =
        D3D11_SRV_DIMENSION_TEXTURE2DMS;

  ID3D11ShaderResourceView *shaddepthOutputDepthSRV = NULL, *shaddepthOutputStencilSRV = NULL;

  {
    copyDepthSRVDesc.Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
    m_pDevice->CreateShaderResourceView(shaddepthOutput, &copyDepthSRVDesc, &shaddepthOutputDepthSRV);
    copyDepthSRVDesc.Format = DXGI_FORMAT_X32_TYPELESS_G8X24_UINT;
    m_pDevice->CreateShaderResourceView(shaddepthOutput, &copyDepthSRVDesc,
                                        &shaddepthOutputStencilSRV);
  }

  // depth texture to copy to, as CopySubresourceRegion can't copy single pixels out of a depth
  // buffer, and we can't guarantee that the original depth texture is SRV-compatible to allow
  // single-pixel copies via compute shader.
  //
  // Due to copies having to match formats between source and destination we don't create these
  // textures up front but on demand, and resize up as necessary. We do a whole copy from this, then
  // a CS copy via SRV to UAV to copy into the pixstore (which we do a final copy to for readback).
  // The extra step is necessary as you can Copy to a staging texture but you can't use a CS, which
  // we need for single-pixel depth (and stencil) copy.

  D3D11_TEXTURE2D_DESC depthCopyD24S8Desc = {
      details.texWidth,
      details.texHeight,
      details.texMips,
      details.texArraySize,
      DXGI_FORMAT_R24G8_TYPELESS,
      {details.sampleCount, details.sampleQuality},
      D3D11_USAGE_DEFAULT,
      D3D11_BIND_SHADER_RESOURCE,
      0,
      0,
  };
  ID3D11Texture2D *depthCopyD24S8 = NULL;
  ID3D11ShaderResourceView *depthCopyD24S8_DepthSRV = NULL, *depthCopyD24S8_StencilSRV = NULL;

  D3D11_TEXTURE2D_DESC depthCopyD32S8Desc = depthCopyD24S8Desc;
  depthCopyD32S8Desc.Format = DXGI_FORMAT_R32G8X24_TYPELESS;
  ID3D11Texture2D *depthCopyD32S8 = NULL;
  ID3D11ShaderResourceView *depthCopyD32S8_DepthSRV = NULL, *depthCopyD32S8_StencilSRV = NULL;

  D3D11_TEXTURE2D_DESC depthCopyD32Desc = depthCopyD32S8Desc;
  depthCopyD32Desc.Format = DXGI_FORMAT_R32_TYPELESS;
  ID3D11Texture2D *depthCopyD32 = NULL;
  ID3D11ShaderResourceView *depthCopyD32_DepthSRV = NULL;

  D3D11_TEXTURE2D_DESC depthCopyD16Desc = depthCopyD24S8Desc;
  depthCopyD16Desc.Format = DXGI_FORMAT_R16_TYPELESS;
  ID3D11Texture2D *depthCopyD16 = NULL;
  ID3D11ShaderResourceView *depthCopyD16_DepthSRV = NULL;

  uint32_t srcxyData[8] = {
      x,
      y,
      multisampled ? sampleIdx : mip,
      slice,

      uint32_t(multisampled),
      uint32_t(floatTex),
      uint32_t(uintTex),
      uint32_t(intTex),
  };

  uint32_t shadoutsrcxyData[8];
  memcpy(shadoutsrcxyData, srcxyData, sizeof(srcxyData));

  // shadout texture doesn't have slices/mips, just one of the right dimension
  shadoutsrcxyData[2] = multisampled ? sampleIdx : 0;
  shadoutsrcxyData[3] = 0;

  ID3D11Buffer *srcxyCBuf = GetDebugManager()->MakeCBuffer(sizeof(srcxyData));
  ID3D11Buffer *shadoutsrcxyCBuf = GetDebugManager()->MakeCBuffer(sizeof(shadoutsrcxyData));
  ID3D11Buffer *storeCBuf = GetDebugManager()->MakeCBuffer(sizeof(srcxyData));

  GetDebugManager()->FillCBuffer(srcxyCBuf, srcxyData, sizeof(srcxyData));
  GetDebugManager()->FillCBuffer(shadoutsrcxyCBuf, shadoutsrcxyData, sizeof(shadoutsrcxyData));

  // so we do:
  // per sample: orig depth --copy--> depthCopyXXX (created/upsized on demand) --CS pixel copy-->
  // pixstoreDepth
  // at end: pixstoreDepth --copy--> pixstoreDepthReadback
  //
  // First copy is only needed if orig depth is not SRV-able
  // CS pixel copy is needed since it's the only way to copy only one pixel from depth texture,
  // CopySubresourceRegion
  // can't copy a sub-box of a depth copy. It also is required in the MSAA case to read a specific
  // pixel/sample out.
  //
  // final copy is needed to get data into a readback buffer since we can't have CS writing to
  // staging buffer
  //
  // for colour it's simple, it's just
  // per sample: orig color --copy--> pixstore
  // at end: pixstore --copy--> pixstoreReadback
  //
  // this is slightly redundant but it only adds one extra copy at the end and an extra target, and
  // allows to handle MSAA source textures (which can't copy direct to a staging texture)

  ID3D11Resource *targetres = NULL;

  if(WrappedID3D11Texture1D::m_TextureList.find(target) != WrappedID3D11Texture1D::m_TextureList.end())
    targetres = WrappedID3D11Texture1D::m_TextureList[target].m_Texture;
  else if(WrappedID3D11Texture2D1::m_TextureList.find(target) !=
          WrappedID3D11Texture2D1::m_TextureList.end())
    targetres = WrappedID3D11Texture2D1::m_TextureList[target].m_Texture;
  else if(WrappedID3D11Texture3D1::m_TextureList.find(target) !=
          WrappedID3D11Texture3D1::m_TextureList.end())
    targetres = WrappedID3D11Texture3D1::m_TextureList[target].m_Texture;

  D3D11CopyPixelParams colourCopyParams = {};

  // common parameters
  colourCopyParams.multisampled = multisampled;
  colourCopyParams.floatTex = floatTex;
  colourCopyParams.uintTex = uintTex;
  colourCopyParams.intTex = intTex;
  colourCopyParams.srcxyCBuf = srcxyCBuf;
  colourCopyParams.storeCBuf = storeCBuf;
  if(details.texType == eTexType_3D)
    colourCopyParams.subres = mip;
  else
    colourCopyParams.subres = details.texArraySize * slice + mip;

  D3D11CopyPixelParams depthCopyParams = colourCopyParams;

  colourCopyParams.depthcopy = false;
  colourCopyParams.sourceTex = (ID3D11Texture2D *)targetres;
  colourCopyParams.srvTex = (ID3D11Texture2D *)details.srvResource;
  colourCopyParams.srv[0] = details.srv[details.texType];
  colourCopyParams.srv[1] = NULL;
  colourCopyParams.uav = pixstoreUAV;

  depthCopyParams.depthcopy = true;
  depthCopyParams.uav = pixstoreDepthUAV;

  // while issuing the above queries we can check to see which tests are enabled so we don't
  // bother checking if depth testing failed if the depth test was disabled
  rdcarray<uint32_t> flags;
  flags.resize(events.size());
  std::map<uint32_t, D3D11_COMPARISON_FUNC> depthOps;
  std::map<uint32_t, DXGI_FORMAT> depthFormats;
  enum
  {
    TestEnabled_BackfaceCulling = 1 << 0,
    TestEnabled_DepthClip = 1 << 1,
    TestEnabled_Scissor = 1 << 2,
    TestEnabled_DepthTesting = 1 << 3,
    TestEnabled_StencilTesting = 1 << 4,

    // important to know if blending is enabled or not as we currently skip a bunch of stuff
    // and only pay attention to the final passing fragment if blending is off
    Blending_Enabled = 1 << 5,

    // additional flags we can trivially detect on the CPU for edge cases
    TestMustFail_Scissor =
        1 << 6,    // if the scissor is enabled, pixel lies outside all regions (could be only one)
    TestMustPass_Scissor =
        1 << 7,    // if the scissor is enabled, pixel lies inside all regions (could be only one)
    TestMustFail_DepthTesting = 1 << 8,      // if the comparison func is NEVER
    TestMustFail_StencilTesting = 1 << 9,    // if the comparison func is NEVER for both faces, or
                                             // one face is backface culled and the other is NEVER

    // if the sample mask set at this event doesn't have the right bit set
    TestMustFail_SampleMask = 1 << 10,

    // if predication was failing at this event
    Predication_Failed = 1 << 11,
  };

#if 1
  BOOL occlData = 0;
  const D3D11_QUERY_DESC occlDesc = {D3D11_QUERY_OCCLUSION_PREDICATE, 0};
#else
  UINT64 occlData = 0;
  const D3D11_QUERY_DESC occlDesc = {D3D11_QUERY_OCCLUSION, 0};
#endif

  HRESULT hr = S_OK;

  for(size_t i = 0; i < events.size(); i++)
  {
    ID3D11Query *q = NULL;
    m_pDevice->CreateQuery(&occlDesc, &q);
    occl.push_back(q);
  }

  for(size_t i = 0; i < ARRAY_COUNT(testQueries); i++)
    m_pDevice->CreateQuery(&occlDesc, &testQueries[i]);

  //////////////////////////////////////////////////////////////////
  // Check that everything we need has successfully created.
  // We free everything together at the end

  bool allCreated = true;

  for(size_t i = 0; i < ARRAY_COUNT(testQueries); i++)
  {
    if(!testQueries[i])
    {
      RDCERR("Failed to create test query %d", i);
      allCreated = false;
    }
  }

  if(!pixstore || !pixstoreUAV || !pixstoreReadback)
  {
    RDCERR("Failed to create pixstore (%p %p %p) (%u slots @ fmt %u)", pixstore, pixstoreUAV,
           pixstoreReadback, pixstoreSlots, details.texFmt);
    allCreated = false;
  }

  if(!pixstoreDepth || !pixstoreDepthUAV || !pixstoreDepthReadback)
  {
    RDCERR("Failed to create pixstoreDepth (%p %p %p) (%u slots @ fmt %u)", pixstoreDepth,
           pixstoreDepthUAV, pixstoreDepthReadback, pixstoreSlots, details.texFmt);
    allCreated = false;
  }

  if(!shadoutStore || !shadoutStoreUAV || !shadoutStoreReadback)
  {
    RDCERR("Failed to create shadoutStore (%p %p %p) (%u slots @ fmt %u)", shadoutStore,
           shadoutStoreUAV, shadoutStoreReadback, pixstoreSlots, details.texFmt);
    allCreated = false;
  }

  if(!shadOutput || !shadOutputSRV || !shadOutputRTV)
  {
    RDCERR("Failed to create shadOutput (%p %p %p) (%ux%u [%u,%u] @ fmt %u)", shadOutput,
           shadOutputSRV, shadOutputRTV, details.texWidth, details.texHeight, details.sampleCount,
           details.sampleQuality, details.texFmt);
    allCreated = false;
  }

  if(!shaddepthOutput || !shaddepthOutputDSV || !shaddepthOutputDepthSRV || !shaddepthOutputStencilSRV)
  {
    RDCERR("Failed to create shaddepthOutput (%p %p %p %p) (%ux%u [%u,%u] @ fmt %u)", shaddepthOutput,
           shaddepthOutputDSV, shaddepthOutputDepthSRV, shaddepthOutputStencilSRV, details.texWidth,
           details.texHeight, details.sampleCount, details.sampleQuality, details.texFmt);
    allCreated = false;
  }

  if(!srcxyCBuf || !storeCBuf)
  {
    RDCERR("Failed to create cbuffers (%p %p)", srcxyCBuf, storeCBuf);
    allCreated = false;
  }

  if(!allCreated)
  {
    for(size_t i = 0; i < ARRAY_COUNT(testQueries); i++)
      SAFE_RELEASE(testQueries[i]);

    SAFE_RELEASE(pixstore);
    SAFE_RELEASE(shadoutStore);
    SAFE_RELEASE(pixstoreDepth);

    SAFE_RELEASE(pixstoreReadback);
    SAFE_RELEASE(shadoutStoreReadback);
    SAFE_RELEASE(pixstoreDepthReadback);

    SAFE_RELEASE(pixstoreUAV);
    SAFE_RELEASE(shadoutStoreUAV);
    SAFE_RELEASE(pixstoreDepthUAV);

    SAFE_RELEASE(shadOutput);
    SAFE_RELEASE(shadOutputSRV);
    SAFE_RELEASE(shadOutputRTV);
    SAFE_RELEASE(shaddepthOutput);
    SAFE_RELEASE(shaddepthOutputDSV);
    SAFE_RELEASE(shaddepthOutputDepthSRV);
    SAFE_RELEASE(shaddepthOutputStencilSRV);

    SAFE_RELEASE(depthCopyD24S8);
    SAFE_RELEASE(depthCopyD24S8_DepthSRV);
    SAFE_RELEASE(depthCopyD24S8_StencilSRV);

    SAFE_RELEASE(depthCopyD32S8);
    SAFE_RELEASE(depthCopyD32S8_DepthSRV);
    SAFE_RELEASE(depthCopyD32S8_StencilSRV);

    SAFE_RELEASE(depthCopyD32);
    SAFE_RELEASE(depthCopyD32_DepthSRV);

    SAFE_RELEASE(depthCopyD16);
    SAFE_RELEASE(depthCopyD16_DepthSRV);

    SAFE_RELEASE(srcxyCBuf);
    SAFE_RELEASE(shadoutsrcxyCBuf);
    SAFE_RELEASE(storeCBuf);

    return history;
  }

  {
    D3D11MarkerRegion pristine(
        StringFormat::Fmt("Replaying up to first event %u for pristine start", events[0].eventId));
    m_pDevice->ReplayLog(0, events[0].eventId, eReplay_WithoutDraw);
  }

  ID3D11RasterizerState *curRS = NULL;
  ID3D11RasterizerState *newRS = NULL;
  ID3D11DepthStencilState *newDS = NULL;
  ID3D11PixelShader *curPS = NULL;
  ID3D11ClassInstance *curInst[D3D11_SHADER_MAX_INTERFACES] = {NULL};
  UINT curNumInst = 0;
  UINT curNumViews = 0;
  UINT curNumScissors = 0;
  D3D11_VIEWPORT curViewports[16] = {0};
  D3D11_RECT curScissors[16] = {0};
  D3D11_RECT newScissors[16] = {0};
  ID3D11BlendState *curBS = NULL;
  float blendFactor[4] = {0};
  UINT curSample = 0;
  ID3D11DepthStencilState *curDS = NULL;
  UINT stencilRef = 0;

  ////////////////////////////////////////////////////////////////////////
  // Main loop over each event to determine if it rasterized to this pixel

  for(size_t ev = 0; ev < events.size(); ev++)
  {
    curNumInst = D3D11_SHADER_MAX_INTERFACES;
    curNumScissors = curNumViews = 16;

    D3D11MarkerRegion evmarker(StringFormat::Fmt("Processing output for %u", events[ev].eventId));

    bool uavOutput =
        ((events[ev].usage >= ResourceUsage::VS_RWResource &&
          events[ev].usage <= ResourceUsage::CS_RWResource) ||
         events[ev].usage == ResourceUsage::CopyDst || events[ev].usage == ResourceUsage::Copy ||
         events[ev].usage == ResourceUsage::Resolve ||
         events[ev].usage == ResourceUsage::ResolveDst || events[ev].usage == ResourceUsage::GenMips);

    m_pImmediateContext->RSGetState(&curRS);
    m_pImmediateContext->OMGetBlendState(&curBS, blendFactor, &curSample);
    m_pImmediateContext->OMGetDepthStencilState(&curDS, &stencilRef);
    m_pImmediateContext->PSGetShader(&curPS, curInst, &curNumInst);
    m_pImmediateContext->RSGetViewports(&curNumViews, curViewports);
    m_pImmediateContext->RSGetScissorRects(&curNumScissors, curScissors);

    // defaults (mostly)
    // disable tests/clips and enable scissor as we need it to clip visibility to just our pixel
    D3D11_RASTERIZER_DESC rd = {
        /*FillMode =*/D3D11_FILL_SOLID,
        /*CullMode =*/D3D11_CULL_NONE,
        /*FrontCounterClockwise =*/FALSE,
        /*DepthBias =*/D3D11_DEFAULT_DEPTH_BIAS,
        /*DepthBiasClamp =*/D3D11_DEFAULT_DEPTH_BIAS_CLAMP,
        /*SlopeScaledDepthBias =*/D3D11_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
        /*DepthClipEnable =*/FALSE,
        /*ScissorEnable =*/TRUE,
        /*MultisampleEnable =*/FALSE,
        /*AntialiasedLineEnable =*/FALSE,
    };

    D3D11_RASTERIZER_DESC rsDesc = {
        /*FillMode =*/D3D11_FILL_SOLID,
        /*CullMode =*/D3D11_CULL_BACK,
        /*FrontCounterClockwise =*/FALSE,
        /*DepthBias =*/D3D11_DEFAULT_DEPTH_BIAS,
        /*DepthBiasClamp =*/D3D11_DEFAULT_DEPTH_BIAS_CLAMP,
        /*SlopeScaledDepthBias =*/D3D11_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
        /*DepthClipEnable =*/TRUE,
        /*ScissorEnable =*/FALSE,
        /*MultisampleEnable =*/FALSE,
        /*AntialiasedLineEnable =*/FALSE,
    };

    if(curRS)
    {
      curRS->GetDesc(&rsDesc);

      rd = rsDesc;

      if(rd.CullMode != D3D11_CULL_NONE)
        flags[ev] |= TestEnabled_BackfaceCulling;
      if(rd.DepthClipEnable)
        flags[ev] |= TestEnabled_DepthClip;
      if(rd.ScissorEnable)
        flags[ev] |= TestEnabled_Scissor;

      rd.CullMode = D3D11_CULL_NONE;
      rd.DepthClipEnable = FALSE;

      rd.ScissorEnable = TRUE;
    }
    else
    {
      rsDesc.CullMode = D3D11_CULL_BACK;
      rsDesc.ScissorEnable = FALSE;

      // defaults
      flags[ev] |= (TestEnabled_BackfaceCulling | TestEnabled_DepthClip);
    }

    D3D11_COMPARISON_FUNC depthOp = D3D11_COMPARISON_LESS;

    if(curDS)
    {
      D3D11_DEPTH_STENCIL_DESC dsDesc;
      curDS->GetDesc(&dsDesc);

      if(dsDesc.DepthEnable)
      {
        if(dsDesc.DepthFunc != D3D11_COMPARISON_ALWAYS)
          flags[ev] |= TestEnabled_DepthTesting;

        if(dsDesc.DepthFunc == D3D11_COMPARISON_NEVER)
          flags[ev] |= TestMustFail_DepthTesting;

        depthOp = dsDesc.DepthFunc;
      }
      else
      {
        depthOp = D3D11_COMPARISON_ALWAYS;
      }

      if(dsDesc.StencilEnable)
      {
        if(dsDesc.FrontFace.StencilFunc != D3D11_COMPARISON_ALWAYS ||
           dsDesc.BackFace.StencilFunc != D3D11_COMPARISON_ALWAYS)
          flags[ev] |= TestEnabled_StencilTesting;

        if(dsDesc.FrontFace.StencilFunc == D3D11_COMPARISON_NEVER &&
           dsDesc.BackFace.StencilFunc == D3D11_COMPARISON_NEVER)
          flags[ev] |= TestMustFail_StencilTesting;

        if(dsDesc.FrontFace.StencilFunc == D3D11_COMPARISON_NEVER &&
           rsDesc.CullMode == D3D11_CULL_BACK)
          flags[ev] |= TestMustFail_StencilTesting;

        if(rsDesc.CullMode == D3D11_CULL_FRONT &&
           dsDesc.BackFace.StencilFunc == D3D11_COMPARISON_NEVER)
          flags[ev] |= TestMustFail_StencilTesting;
      }
    }
    else
    {
      // defaults
      flags[ev] |= TestEnabled_DepthTesting;
    }

    depthOps[events[ev].eventId] = depthOp;

    if(rsDesc.ScissorEnable)
    {
      // see if we can find at least one scissor region this pixel could fall into
      bool inRegion = false;
      bool inAllRegions = true;

      for(UINT i = 0; i < curNumScissors && i < curNumViews; i++)
      {
        if(xf >= float(curScissors[i].left) && yf >= float(curScissors[i].top) &&
           xf < float(curScissors[i].right) && yf < float(curScissors[i].bottom))
        {
          inRegion = true;
        }
        else
        {
          inAllRegions = false;
        }
      }

      if(!inRegion)
        flags[ev] |= TestMustFail_Scissor;
      if(inAllRegions)
        flags[ev] |= TestMustPass_Scissor;
    }

    if(curBS)
    {
      D3D11_BLEND_DESC desc;
      curBS->GetDesc(&desc);

      if(desc.IndependentBlendEnable)
      {
        for(int i = 0; i < 8; i++)
        {
          if(desc.RenderTarget[i].BlendEnable)
          {
            flags[ev] |= Blending_Enabled;
            break;
          }
        }
      }
      else
      {
        if(desc.RenderTarget[0].BlendEnable)
          flags[ev] |= Blending_Enabled;
      }
    }
    else
    {
      // no blending enabled by default
    }

    // sampleMask is a mask containing only the bit for the sample we want
    // (or 0xFFFFFFFF if no sample was chosen and we are looking at them all).
    if((curSample & sampleMask) == 0)
    {
      flags[ev] |= TestMustFail_SampleMask;
    }

    if(!m_pImmediateContext->GetCurrentPipelineState()->PredicationWouldPass())
      flags[ev] |= Predication_Failed;

    m_pDevice->CreateRasterizerState(&rd, &newRS);
    m_pImmediateContext->RSSetState(newRS);
    SAFE_RELEASE(newRS);

    m_pImmediateContext->PSSetShader(m_General.FixedColPS, NULL, 0);

    m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
    m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.NopDepthState, stencilRef);

    for(UINT i = 0; i < curNumViews; i++)
    {
      // calculate scissor, relative to this viewport, that encloses only (x,y) pixel

      // if (x,y) pixel isn't in viewport, make empty rect)
      if(xf < curViewports[i].TopLeftX || yf < curViewports[i].TopLeftY ||
         xf >= curViewports[i].TopLeftX + curViewports[i].Width ||
         yf >= curViewports[i].TopLeftY + curViewports[i].Height)
      {
        newScissors[i].left = newScissors[i].top = newScissors[i].bottom = newScissors[i].right = 0;
      }
      else
      {
        newScissors[i].left = LONG(x);
        newScissors[i].top = LONG(y);
        newScissors[i].right = newScissors[i].left + 1;
        newScissors[i].bottom = newScissors[i].top + 1;
      }
    }

    // scissor every viewport
    m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

    // figure out where this event lies in the pixstore texture

    bool depthBound = false;
    ID3D11Texture2D **copyTex = NULL;
    ID3D11ShaderResourceView **copyDepthSRV = NULL;
    ID3D11ShaderResourceView **copyStencilSRV = NULL;
    ID3D11Resource *depthRes = NULL;

    // if the depth resource was already BIND_SRV we just create these SRVs pointing to it,
    // then release them after, instead of using srvs to texture copies
    ID3D11ShaderResourceView *releaseDepthSRV = NULL;
    ID3D11ShaderResourceView *releaseStencilSRV = NULL;

    {
      ID3D11DepthStencilView *dsv = NULL;
      m_pImmediateContext->OMGetRenderTargets(0, NULL, &dsv);

      if(dsv)
      {
        depthBound = true;

        dsv->GetResource(&depthRes);

        D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc;
        dsv->GetDesc(&dsvDesc);

        SAFE_RELEASE(dsv);

        D3D11_RESOURCE_DIMENSION dim;
        depthRes->GetType(&dim);

        D3D11_TEXTURE2D_DESC desc2d;
        RDCEraseEl(desc2d);

        if(dim == D3D11_RESOURCE_DIMENSION_TEXTURE1D)
        {
          ID3D11Texture1D *tex = (ID3D11Texture1D *)depthRes;
          D3D11_TEXTURE1D_DESC desc1d;
          tex->GetDesc(&desc1d);

          desc2d.Format = desc1d.Format;
          desc2d.Width = desc1d.Width;
          desc2d.Height = 1;
          desc2d.BindFlags = desc1d.BindFlags;
        }
        else if(dim == D3D11_RESOURCE_DIMENSION_TEXTURE2D)
        {
          ID3D11Texture2D *tex = (ID3D11Texture2D *)depthRes;
          tex->GetDesc(&desc2d);
        }
        else
        {
          RDCERR("Unexpected size of depth buffer");
        }

        depthFormats[events[ev].eventId] = desc2d.Format;

        bool srvable = (dim == D3D11_RESOURCE_DIMENSION_TEXTURE2D) &&
                       (desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE) > 0;

        D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
        srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
        if(dsvDesc.ViewDimension == D3D11_DSV_DIMENSION_TEXTURE2DMS)
          srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DMS;
        srvDesc.Texture2D.MipLevels = 1;
        srvDesc.Texture2D.MostDetailedMip = dsvDesc.Texture2D.MipSlice;

        D3D11_TEXTURE2D_DESC *copyDesc = NULL;
        if(desc2d.Format == DXGI_FORMAT_R16_FLOAT || desc2d.Format == DXGI_FORMAT_R16_SINT ||
           desc2d.Format == DXGI_FORMAT_R16_UINT || desc2d.Format == DXGI_FORMAT_R16_SNORM ||
           desc2d.Format == DXGI_FORMAT_R16_UNORM || desc2d.Format == DXGI_FORMAT_R16_TYPELESS ||
           desc2d.Format == DXGI_FORMAT_D16_UNORM)
        {
          copyDesc = &depthCopyD16Desc;
          copyTex = &depthCopyD16;
          copyDepthSRV = &depthCopyD16_DepthSRV;
          copyStencilSRV = NULL;

          copyDepthSRVDesc.Format = DXGI_FORMAT_R16_UNORM;

          if(srvable)
          {
            srvDesc.Format = DXGI_FORMAT_R16_UNORM;

            copyTex = (ID3D11Texture2D **)&depthRes;
            m_pDevice->CreateShaderResourceView(depthRes, &srvDesc, &releaseDepthSRV);
            copyDepthSRV = &releaseDepthSRV;
          }
        }
        else if(desc2d.Format == DXGI_FORMAT_R24_UNORM_X8_TYPELESS ||
                desc2d.Format == DXGI_FORMAT_R24G8_TYPELESS ||
                desc2d.Format == DXGI_FORMAT_D24_UNORM_S8_UINT)
        {
          copyDesc = &depthCopyD24S8Desc;
          copyTex = &depthCopyD24S8;
          copyDepthSRV = &depthCopyD24S8_DepthSRV;
          copyStencilSRV = &depthCopyD24S8_StencilSRV;

          copyDepthSRVDesc.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
          copyStencilSRVDesc.Format = DXGI_FORMAT_X24_TYPELESS_G8_UINT;

          if(srvable)
          {
            srvDesc.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS;

            copyTex = (ID3D11Texture2D **)&depthRes;
            m_pDevice->CreateShaderResourceView(depthRes, &srvDesc, &releaseDepthSRV);
            copyDepthSRV = &releaseDepthSRV;
            srvDesc.Format = DXGI_FORMAT_X24_TYPELESS_G8_UINT;
            m_pDevice->CreateShaderResourceView(depthRes, &srvDesc, &releaseStencilSRV);
            copyStencilSRV = &releaseStencilSRV;
          }
        }
        else if(desc2d.Format == DXGI_FORMAT_R32_FLOAT || desc2d.Format == DXGI_FORMAT_R32_SINT ||
                desc2d.Format == DXGI_FORMAT_R32_UINT ||
                desc2d.Format == DXGI_FORMAT_R32_TYPELESS || desc2d.Format == DXGI_FORMAT_D32_FLOAT)
        {
          copyDesc = &depthCopyD32Desc;
          copyTex = &depthCopyD32;
          copyDepthSRV = &depthCopyD32_DepthSRV;
          copyStencilSRV = NULL;

          copyDepthSRVDesc.Format = DXGI_FORMAT_R32_FLOAT;

          if(srvable)
          {
            srvDesc.Format = DXGI_FORMAT_R32_FLOAT;

            copyTex = (ID3D11Texture2D **)&depthRes;
            m_pDevice->CreateShaderResourceView(depthRes, &srvDesc, &releaseDepthSRV);
            copyDepthSRV = &releaseDepthSRV;
          }
        }
        else if(desc2d.Format == DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS ||
                desc2d.Format == DXGI_FORMAT_R32G8X24_TYPELESS ||
                desc2d.Format == DXGI_FORMAT_D32_FLOAT_S8X24_UINT)
        {
          copyDesc = &depthCopyD32S8Desc;
          copyTex = &depthCopyD32S8;
          copyDepthSRV = &depthCopyD32S8_DepthSRV;
          copyStencilSRV = &depthCopyD32S8_StencilSRV;

          copyDepthSRVDesc.Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
          copyStencilSRVDesc.Format = DXGI_FORMAT_X32_TYPELESS_G8X24_UINT;

          if(srvable)
          {
            srvDesc.Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;

            copyTex = (ID3D11Texture2D **)&depthRes;
            m_pDevice->CreateShaderResourceView(depthRes, &srvDesc, &releaseDepthSRV);
            copyDepthSRV = &releaseDepthSRV;
            srvDesc.Format = DXGI_FORMAT_X32_TYPELESS_G8X24_UINT;
            m_pDevice->CreateShaderResourceView(depthRes, &srvDesc, &releaseStencilSRV);
            copyStencilSRV = &releaseStencilSRV;
          }
        }

        if(!srvable &&
           (*copyTex == NULL || desc2d.Width > copyDesc->Width || desc2d.Height > copyDesc->Height))
        {
          // recreate texture
          SAFE_RELEASE(*copyTex);
          SAFE_RELEASE(*copyDepthSRV);
          if(copyStencilSRV)
            SAFE_RELEASE(*copyStencilSRV);

          m_pDevice->CreateTexture2D(copyDesc, NULL, copyTex);
          m_pDevice->CreateShaderResourceView(*copyTex, &copyDepthSRVDesc, copyDepthSRV);
          if(copyStencilSRV)
            m_pDevice->CreateShaderResourceView(*copyTex, &copyStencilSRVDesc, copyStencilSRV);
        }
      }
    }

    D3D11MarkerRegion::Set("Copying pre-mod[0] col");
    GetDebugManager()->PixelHistoryCopyPixel(colourCopyParams, ev, 0);

    depthCopyParams.depthbound = depthBound;
    depthCopyParams.sourceTex = (ID3D11Texture2D *)depthRes;
    depthCopyParams.srvTex = copyTex ? *copyTex : NULL;
    depthCopyParams.srv[0] = copyDepthSRV ? *copyDepthSRV : NULL;
    depthCopyParams.srv[1] = copyStencilSRV ? *copyStencilSRV : NULL;

    D3D11MarkerRegion::Set("Copying pre-mod[0] depth");
    GetDebugManager()->PixelHistoryCopyPixel(depthCopyParams, ev, 0);

    m_pImmediateContext->Begin(occl[ev]);

    // For UAV output we only want to replay once in pristine conditions (only fetching before/after
    // values)
    if(!uavOutput)
      m_pDevice->ReplayLog(0, events[ev].eventId, eReplay_OnlyDraw);

    m_pImmediateContext->End(occl[ev]);

    // determine how many fragments returned from the shader
    if(!uavOutput)
    {
      D3D11_RASTERIZER_DESC rdsc = rsDesc;

      rdsc.ScissorEnable = TRUE;
      // leave depth clip mode as normal
      // leave backface culling mode as normal

      m_pDevice->CreateRasterizerState(&rdsc, &newRS);

      m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
      m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.AllPassIncrDepthState, stencilRef);
      m_pImmediateContext->RSSetState(newRS);

      SAFE_RELEASE(newRS);

      ID3D11RenderTargetView *tmpViews[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0};
      m_pImmediateContext->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, tmpViews, NULL);

      uint32_t UAVStartSlot = 0;
      for(int i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
      {
        if(tmpViews[i] != NULL)
        {
          UAVStartSlot = i + 1;
          SAFE_RELEASE(tmpViews[i]);
        }
      }

      ID3D11RenderTargetView *prevRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0};
      ID3D11UnorderedAccessView *prevUAVs[D3D11_1_UAV_SLOT_COUNT] = {0};
      ID3D11DepthStencilView *prevDSV = NULL;
      const UINT numUAVs =
          m_pImmediateContext->IsFL11_1() ? D3D11_1_UAV_SLOT_COUNT : D3D11_PS_CS_UAV_REGISTER_COUNT;
      m_pImmediateContext->OMGetRenderTargetsAndUnorderedAccessViews(
          UAVStartSlot, prevRTVs, &prevDSV, UAVStartSlot, numUAVs - UAVStartSlot, prevUAVs);

      D3D11CopyPixelParams params = depthCopyParams;
      params.depthbound = true;
      params.srvTex = params.sourceTex = shaddepthOutput;
      params.srv[0] = shaddepthOutputDepthSRV;
      params.srv[1] = shaddepthOutputStencilSRV;

      D3D11MarkerRegion::Set("Clearing depth/stencil for frag counting");
      m_pImmediateContext->ClearDepthStencilView(shaddepthOutputDSV, D3D11_CLEAR_STENCIL, 1.0f, 0);

      m_pImmediateContext->OMSetRenderTargets(0, NULL, shaddepthOutputDSV);

      // replay first with overlay shader. This is guaranteed to count all fragments
      D3D11MarkerRegion::Set("Counting all fragments[2]");
      m_pDevice->ReplayLog(0, events[ev].eventId, eReplay_OnlyDraw);
      GetDebugManager()->PixelHistoryCopyPixel(params, ev, 2);

      m_pImmediateContext->PSSetShader(curPS, curInst, curNumInst);

      m_pImmediateContext->ClearDepthStencilView(shaddepthOutputDSV, D3D11_CLEAR_STENCIL, 1.0f, 0);

      // now replay with original shader. Some fragments may discard and not be counted
      D3D11MarkerRegion::Set("Counting discarded fragments[3]");
      m_pDevice->ReplayLog(0, events[ev].eventId, eReplay_OnlyDraw);
      GetDebugManager()->PixelHistoryCopyPixel(params, ev, 3);

      UINT initCounts[D3D11_1_UAV_SLOT_COUNT];
      memset(&initCounts[0], 0xff, sizeof(initCounts));

      m_pImmediateContext->OMSetRenderTargetsAndUnorderedAccessViews(
          UAVStartSlot, prevRTVs, prevDSV, UAVStartSlot, numUAVs - UAVStartSlot, prevUAVs,
          initCounts);

      for(int i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
        SAFE_RELEASE(prevRTVs[i]);
      for(int i = 0; i < D3D11_1_UAV_SLOT_COUNT; i++)
        SAFE_RELEASE(prevUAVs[i]);
      SAFE_RELEASE(prevDSV);
    }
    else
    {
      m_pImmediateContext->PSSetShader(curPS, curInst, curNumInst);
    }

    m_pImmediateContext->RSSetState(curRS);
    m_pImmediateContext->RSSetScissorRects(curNumScissors, curScissors);
    m_pImmediateContext->OMSetBlendState(curBS, blendFactor, curSample);
    m_pImmediateContext->OMSetDepthStencilState(curDS, stencilRef);

    for(UINT i = 0; i < curNumInst; i++)
      SAFE_RELEASE(curInst[i]);

    SAFE_RELEASE(curPS);
    SAFE_RELEASE(curRS);
    SAFE_RELEASE(curBS);
    SAFE_RELEASE(curDS);

    // replay only action to get immediately post-modification values
    m_pDevice->ReplayLog(events[ev].eventId, events[ev].eventId, eReplay_OnlyDraw);

    D3D11MarkerRegion::Set("Copying post-mod col/depth[1]");
    GetDebugManager()->PixelHistoryCopyPixel(colourCopyParams, ev, 1);
    GetDebugManager()->PixelHistoryCopyPixel(depthCopyParams, ev, 1);

    SAFE_RELEASE(releaseDepthSRV);
    SAFE_RELEASE(releaseStencilSRV);

    if(ev < events.size() - 1)
    {
      D3D11MarkerRegion continuation(
          StringFormat::Fmt("Replaying partial continuation from %u to %u", events[ev].eventId + 1,
                            events[ev + 1].eventId));
      m_pDevice->ReplayLog(events[ev].eventId + 1, events[ev + 1].eventId, eReplay_WithoutDraw);
    }

    SAFE_RELEASE(depthRes);
  }

  ////////////////////////////////////////////////////////////////////////
  // Second loop over each event to determine if it the above query returned
  // true and narrow down which tests (if any) it failed

  for(size_t i = 0; i < occl.size(); i++)
  {
    do
    {
      hr = m_pImmediateContext->GetData(occl[i], &occlData, sizeof(occlData), 0);
    } while(hr == S_FALSE);
    RDCASSERTEQUAL(hr, S_OK);

    ResourceRange resourceRange(targetres, mip, slice);

    const ActionDescription *action = m_pDevice->GetAction(events[i].eventId);

    bool clear = bool(action->flags & ActionFlags::Clear);

    bool uavWrite =
        ((events[i].usage >= ResourceUsage::VS_RWResource &&
          events[i].usage <= ResourceUsage::CS_RWResource) ||
         events[i].usage == ResourceUsage::CopyDst || events[i].usage == ResourceUsage::Copy ||
         events[i].usage == ResourceUsage::Resolve ||
         events[i].usage == ResourceUsage::ResolveDst || events[i].usage == ResourceUsage::GenMips);

    if(events[i].view != ResourceId())
    {
      // if the access is through a view, check the mip/slice matches
      bool used = false;

      ID3D11DeviceChild *view = m_pDevice->GetResourceManager()->GetCurrentResource(events[i].view);

      if(WrappedID3D11RenderTargetView1::IsAlloc(view))
      {
        WrappedID3D11RenderTargetView1 *rtv = (WrappedID3D11RenderTargetView1 *)view;

        if(rtv->GetResourceRange().Intersects(resourceRange))
          used = true;
      }
      else if(WrappedID3D11DepthStencilView::IsAlloc(view))
      {
        WrappedID3D11DepthStencilView *dsv = (WrappedID3D11DepthStencilView *)view;

        if(dsv->GetResourceRange().Intersects(resourceRange))
          used = true;
      }
      else if(WrappedID3D11ShaderResourceView1::IsAlloc(view))
      {
        WrappedID3D11ShaderResourceView1 *srv = (WrappedID3D11ShaderResourceView1 *)view;

        if(srv->GetResourceRange().Intersects(resourceRange))
          used = true;
      }
      else if(WrappedID3D11UnorderedAccessView1::IsAlloc(view))
      {
        WrappedID3D11UnorderedAccessView1 *uav = (WrappedID3D11UnorderedAccessView1 *)view;

        if(uav->GetResourceRange().Intersects(resourceRange))
          used = true;
      }
      else
      {
        RDCWARN("Unexpected view type, ID %s. Assuming used...", ToStr(events[i].view).c_str());
        used = true;
      }

      if(!used)
      {
        RDCDEBUG("Usage %d at %u didn't refer to the matching mip/slice (%u/%u)", events[i].usage,
                 events[i].eventId, mip, slice);
        occlData = 0;
        clear = uavWrite = false;
      }
    }

    if(occlData > 0 || clear || uavWrite)
    {
      PixelModification mod;
      RDCEraseEl(mod);

      mod.eventId = events[i].eventId;

      mod.directShaderWrite = uavWrite;
      mod.unboundPS = false;

      mod.preMod.col.uintValue[0] = (uint32_t)i;

      if(!(action->flags & ActionFlags::Clear) && !uavWrite)
      {
        if(flags[i] & TestMustFail_DepthTesting)
          mod.depthTestFailed = true;
        if(flags[i] & TestMustFail_StencilTesting)
          mod.stencilTestFailed = true;
        if(flags[i] & TestMustFail_Scissor)
          mod.scissorClipped = true;
        if(flags[i] & TestMustFail_SampleMask)
          mod.sampleMasked = true;
        if(flags[i] & Predication_Failed)
          mod.predicationSkipped = true;

        {
          D3D11MarkerRegion pristine(
              StringFormat::Fmt("Replaying up to event %u for pristine start", events[i].eventId));
          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);
        }

        {
          ID3D11RenderTargetView *tmpViews[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0};
          m_pImmediateContext->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, tmpViews,
                                                  NULL);

          uint32_t UAVStartSlot = 0;
          for(int v = 0; v < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; v++)
          {
            if(tmpViews[v] != NULL)
            {
              UAVStartSlot = v + 1;
              SAFE_RELEASE(tmpViews[v]);
            }
          }

          ID3D11RenderTargetView *curRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0};
          ID3D11UnorderedAccessView *curUAVs[D3D11_1_UAV_SLOT_COUNT] = {0};
          ID3D11DepthStencilView *curDSV = NULL;
          const UINT numUAVs = m_pImmediateContext->IsFL11_1() ? D3D11_1_UAV_SLOT_COUNT
                                                               : D3D11_PS_CS_UAV_REGISTER_COUNT;
          m_pImmediateContext->OMGetRenderTargetsAndUnorderedAccessViews(
              UAVStartSlot, curRTVs, &curDSV, UAVStartSlot, numUAVs - UAVStartSlot, curUAVs);

          // release these now in case we skip this modification, but don't NULL them
          // so we can still compare
          {
            for(int rtv = 0; rtv < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; rtv++)
              if(curRTVs[rtv])
                curRTVs[rtv]->Release();

            for(int uav = 0; uav < D3D11_1_UAV_SLOT_COUNT; uav++)
              if(curUAVs[uav])
                curUAVs[uav]->Release();

            if(curDSV)
              curDSV->Release();
          }
        }

        curNumScissors = curNumViews = 16;
        m_pImmediateContext->RSGetViewports(&curNumViews, curViewports);
        m_pImmediateContext->RSGetScissorRects(&curNumScissors, curScissors);
        m_pImmediateContext->RSGetState(&curRS);
        m_pImmediateContext->OMGetDepthStencilState(&curDS, &stencilRef);
        blendFactor[0] = blendFactor[1] = blendFactor[2] = blendFactor[3] = 1.0f;
        curSample = ~0U;

        D3D11_RASTERIZER_DESC rdesc = {
            /*FillMode =*/D3D11_FILL_SOLID,
            /*CullMode =*/D3D11_CULL_BACK,
            /*FrontCounterClockwise =*/FALSE,
            /*DepthBias =*/D3D11_DEFAULT_DEPTH_BIAS,
            /*DepthBiasClamp =*/D3D11_DEFAULT_DEPTH_BIAS_CLAMP,
            /*SlopeScaledDepthBias =*/D3D11_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
            /*DepthClipEnable =*/TRUE,
            /*ScissorEnable =*/FALSE,
            /*MultisampleEnable =*/FALSE,
            /*AntialiasedLineEnable =*/FALSE,
        };
        if(curRS)
          curRS->GetDesc(&rdesc);

        SAFE_RELEASE(curRS);

        D3D11_DEPTH_STENCIL_DESC dsdesc = {
            /*DepthEnable =*/TRUE,
            /*DepthWriteMask =*/D3D11_DEPTH_WRITE_MASK_ALL,
            /*DepthFunc =*/D3D11_COMPARISON_LESS,
            /*StencilEnable =*/FALSE,
            /*StencilReadMask =*/D3D11_DEFAULT_STENCIL_READ_MASK,
            /*StencilWriteMask =*/D3D11_DEFAULT_STENCIL_WRITE_MASK,
            /*FrontFace =*/{D3D11_STENCIL_OP_KEEP, D3D11_STENCIL_OP_KEEP, D3D11_STENCIL_OP_KEEP, D3D11_COMPARISON_ALWAYS},
            /*BackFace =*/{D3D11_STENCIL_OP_KEEP, D3D11_STENCIL_OP_KEEP, D3D11_STENCIL_OP_KEEP, D3D11_COMPARISON_ALWAYS},
        };

        if(curDS)
          curDS->GetDesc(&dsdesc);

        SAFE_RELEASE(curDS);

        for(UINT v = 0; v < curNumViews; v++)
        {
          // calculate scissor, relative to this viewport, that encloses only (x,y) pixel

          // if (x,y) pixel isn't in viewport, make empty rect)
          if(xf < curViewports[v].TopLeftX || yf < curViewports[v].TopLeftY ||
             xf >= curViewports[v].TopLeftX + curViewports[v].Width ||
             yf >= curViewports[v].TopLeftY + curViewports[v].Height)
          {
            newScissors[v].left = newScissors[v].top = newScissors[v].bottom =
                newScissors[v].right = 0;
          }
          else
          {
            newScissors[v].left = LONG(x);
            newScissors[v].top = LONG(y);
            newScissors[v].right = newScissors[v].left + 1;
            newScissors[v].bottom = newScissors[v].top + 1;
          }
        }

        // for each test we only disable pipeline rejection tests that fall *after* it.
        // e.g. to get an idea if a pixel failed backface culling or not, we enable only backface
        // culling and disable everything else (since it happens first).
        // For depth testing, we leave all tests enabled up to then - as we only want to know which
        // pixels were rejected by the depth test, not pixels that might have passed the depth test
        // had they not been discarded earlier by backface culling or depth clipping.

        // test shader discard
        {
          D3D11MarkerRegion pristine(
              StringFormat::Fmt("Test shader discard in event %u", events[i].eventId));
          D3D11_RASTERIZER_DESC rd = rdesc;

          rd.ScissorEnable = TRUE;
          // leave depth clip mode as normal
          // leave backface culling mode as normal

          m_pDevice->CreateRasterizerState(&rd, &newRS);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);

          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.AllPassDepthState, stencilRef);
          m_pImmediateContext->RSSetState(newRS);
          m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

          m_pImmediateContext->Begin(testQueries[3]);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_OnlyDraw);

          m_pImmediateContext->End(testQueries[3]);

          SAFE_RELEASE(newRS);
        }

        if(flags[i] & TestEnabled_BackfaceCulling)
        {
          D3D11MarkerRegion pristine(
              StringFormat::Fmt("Test backface culling in event %u", events[i].eventId));
          D3D11_RASTERIZER_DESC rd = rdesc;

          rd.ScissorEnable = TRUE;
          rd.DepthClipEnable = FALSE;
          // leave backface culling mode as normal

          m_pDevice->CreateRasterizerState(&rd, &newRS);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);

          m_pImmediateContext->PSSetShader(m_General.FixedColPS, NULL, 0);
          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.AllPassDepthState, stencilRef);
          m_pImmediateContext->RSSetState(newRS);
          m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

          m_pImmediateContext->Begin(testQueries[0]);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_OnlyDraw);

          m_pImmediateContext->End(testQueries[0]);

          SAFE_RELEASE(newRS);
        }

        if(flags[i] & TestEnabled_DepthClip)
        {
          D3D11MarkerRegion pristine(
              StringFormat::Fmt("Test depth clipping in event %u", events[i].eventId));
          D3D11_RASTERIZER_DESC rd = rdesc;

          rd.ScissorEnable = TRUE;
          // leave depth clip mode as normal
          // leave backface culling mode as normal

          m_pDevice->CreateRasterizerState(&rd, &newRS);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);

          m_pImmediateContext->PSSetShader(m_General.FixedColPS, NULL, 0);
          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.AllPassDepthState, stencilRef);
          m_pImmediateContext->RSSetState(newRS);
          m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

          m_pImmediateContext->Begin(testQueries[1]);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_OnlyDraw);

          m_pImmediateContext->End(testQueries[1]);

          SAFE_RELEASE(newRS);
        }

        // only check scissor if test is enabled and we don't know if it's pass or fail yet
        if((flags[i] & (TestEnabled_Scissor | TestMustPass_Scissor | TestMustFail_Scissor)) ==
           TestEnabled_Scissor)
        {
          D3D11MarkerRegion pristine(StringFormat::Fmt("Test scissor in event %u", events[i].eventId));
          D3D11_RASTERIZER_DESC rd = rdesc;

          rd.ScissorEnable = TRUE;
          // leave depth clip mode as normal
          // leave backface culling mode as normal

          // newScissors has scissor regions calculated to hit our target pixel on every viewport,
          // but we must intersect that with the original scissors regions for correct testing
          // behaviour. This amounts to making any scissor region that doesn't overlap with the
          // target pixel empty.
          //
          // Note that in the case of only one scissor region we can trivially detect pass/fail of
          // the test against our pixel on the CPU so we won't come in here (see check above against
          // MustFail/MustPass). So we will only do this in the case where we have multiple scissor
          // regions/viewports, some intersecting the pixel and some not. So we make the not
          // intersecting scissor regions empty so our occlusion query tests to see if any pixels
          // were written to the "passing" viewports
          D3D11_RECT intersectScissors[16] = {0};
          memcpy(intersectScissors, newScissors, sizeof(intersectScissors));

          for(UINT s = 0; s < curNumScissors; s++)
          {
            if(curScissors[s].left > newScissors[s].left ||
               curScissors[s].right < newScissors[s].right ||
               curScissors[s].top > newScissors[s].top ||
               curScissors[s].bottom < newScissors[s].bottom)
            {
              // scissor region from the log doesn't touch our target pixel, make empty.
              intersectScissors[s].left = intersectScissors[s].right = intersectScissors[s].top =
                  intersectScissors[s].bottom = 0;
            }
          }

          m_pDevice->CreateRasterizerState(&rd, &newRS);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);

          m_pImmediateContext->PSSetShader(m_General.FixedColPS, NULL, 0);
          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.AllPassDepthState, stencilRef);
          m_pImmediateContext->RSSetState(newRS);
          m_pImmediateContext->RSSetScissorRects(curNumScissors, intersectScissors);

          m_pImmediateContext->Begin(testQueries[2]);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_OnlyDraw);

          m_pImmediateContext->End(testQueries[2]);

          SAFE_RELEASE(newRS);
        }

        if(flags[i] & TestEnabled_DepthTesting)
        {
          D3D11MarkerRegion pristine(
              StringFormat::Fmt("Test depth testing in event %u", events[i].eventId));

          D3D11_RASTERIZER_DESC rd = rdesc;

          rd.ScissorEnable = TRUE;
          // leave depth clip mode as normal
          // leave backface culling mode as normal

          m_pDevice->CreateRasterizerState(&rd, &newRS);

          D3D11_DEPTH_STENCIL_DESC dsd = dsdesc;

          // make stencil trivially pass
          dsd.StencilEnable = TRUE;
          dsd.StencilReadMask = 0xff;
          dsd.StencilWriteMask = 0xff;
          dsd.FrontFace.StencilDepthFailOp = dsd.FrontFace.StencilFailOp =
              dsd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
          dsd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
          dsd.BackFace.StencilDepthFailOp = dsd.BackFace.StencilFailOp =
              dsd.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
          dsd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS;

          m_pDevice->CreateDepthStencilState(&dsd, &newDS);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);

          m_pImmediateContext->PSSetShader(m_General.FixedColPS, NULL, 0);
          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetDepthStencilState(newDS, stencilRef);
          m_pImmediateContext->RSSetState(newRS);
          m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

          m_pImmediateContext->Begin(testQueries[4]);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_OnlyDraw);

          m_pImmediateContext->End(testQueries[4]);

          SAFE_RELEASE(newRS);
          SAFE_RELEASE(newDS);
        }

        if(flags[i] & TestEnabled_StencilTesting)
        {
          D3D11MarkerRegion pristine(
              StringFormat::Fmt("Test stencil testing in event %u", events[i].eventId));

          D3D11_RASTERIZER_DESC rd = rdesc;

          rd.ScissorEnable = TRUE;
          rd.DepthClipEnable = FALSE;
          rd.CullMode = D3D11_CULL_NONE;

          m_pDevice->CreateRasterizerState(&rd, &newRS);

          // leave depthstencil testing exactly as is, because a depth-fail means
          // stencil isn't run
          m_pDevice->CreateDepthStencilState(&dsdesc, &newDS);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_WithoutDraw);

          m_pImmediateContext->PSSetShader(m_General.FixedColPS, NULL, 0);
          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetDepthStencilState(newDS, stencilRef);
          m_pImmediateContext->RSSetState(newRS);
          m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

          m_pImmediateContext->Begin(testQueries[5]);

          m_pDevice->ReplayLog(0, events[i].eventId, eReplay_OnlyDraw);

          m_pImmediateContext->End(testQueries[5]);

          SAFE_RELEASE(newRS);
          SAFE_RELEASE(newDS);
        }

        // we check these in the order defined, as a positive from the backface cull test
        // will invalidate tests later (as they will also be backface culled)

        do
        {
          if(flags[i] & TestEnabled_BackfaceCulling)
          {
            do
            {
              hr = m_pImmediateContext->GetData(testQueries[0], &occlData, sizeof(occlData), 0);
            } while(hr == S_FALSE);
            RDCASSERTEQUAL(hr, S_OK);

            mod.backfaceCulled = (occlData == 0);

            if(mod.backfaceCulled)
              break;
          }

          if(flags[i] & TestEnabled_DepthClip)
          {
            do
            {
              hr = m_pImmediateContext->GetData(testQueries[1], &occlData, sizeof(occlData), 0);
            } while(hr == S_FALSE);
            RDCASSERTEQUAL(hr, S_OK);

            mod.depthClipped = (occlData == 0);

            if(mod.depthClipped)
              break;
          }

          if(!mod.backfaceCulled && (flags[i] & (TestEnabled_Scissor | TestMustPass_Scissor |
                                                 TestMustFail_Scissor)) == TestEnabled_Scissor)
          {
            do
            {
              hr = m_pImmediateContext->GetData(testQueries[2], &occlData, sizeof(occlData), 0);
            } while(hr == S_FALSE);
            RDCASSERTEQUAL(hr, S_OK);

            mod.scissorClipped = (occlData == 0);

            if(mod.scissorClipped)
              break;
          }

          {
            do
            {
              hr = m_pImmediateContext->GetData(testQueries[3], &occlData, sizeof(occlData), 0);
            } while(hr == S_FALSE);
            RDCASSERTEQUAL(hr, S_OK);

            mod.shaderDiscarded = (occlData == 0);

            if(mod.shaderDiscarded)
              break;
          }

          if(flags[i] & TestEnabled_DepthTesting)
          {
            do
            {
              hr = m_pImmediateContext->GetData(testQueries[4], &occlData, sizeof(occlData), 0);
            } while(hr == S_FALSE);
            RDCASSERTEQUAL(hr, S_OK);

            mod.depthTestFailed = (occlData == 0);

            if(mod.depthTestFailed)
              break;
          }

          if(flags[i] & TestEnabled_StencilTesting)
          {
            do
            {
              hr = m_pImmediateContext->GetData(testQueries[5], &occlData, sizeof(occlData), 0);
            } while(hr == S_FALSE);
            RDCASSERTEQUAL(hr, S_OK);

            mod.stencilTestFailed = (occlData == 0);

            if(mod.stencilTestFailed)
              break;
          }
        } while((void)0, 0);
      }

      history.push_back(mod);

      RDCDEBUG("Event %u is visible, %llu samples visible", events[i].eventId, (UINT64)occlData);
    }

    SAFE_RELEASE(occl[i]);
  }

  m_pImmediateContext->CopyResource(pixstoreReadback, pixstore);
  m_pImmediateContext->CopyResource(pixstoreDepthReadback, pixstoreDepth);

  D3D11_MAPPED_SUBRESOURCE mapped = {0};
  m_pImmediateContext->Map(pixstoreReadback, 0, D3D11_MAP_READ, 0, &mapped);

  D3D11_MAPPED_SUBRESOURCE mappedDepth = {0};
  m_pImmediateContext->Map(pixstoreDepthReadback, 0, D3D11_MAP_READ, 0, &mappedDepth);

  byte *pixstoreDepthData = (byte *)mappedDepth.pData;
  byte *pixstoreData = (byte *)mapped.pData;

  ////////////////////////////////////////////////////////////////////////////////////////
  // Third loop over each modification event to read back the pre-action colour + depth data
  // as well as the # fragments to use in the next step

  for(size_t h = 0; h < history.size(); h++)
  {
    PixelModification &mod = history[h];

    uint32_t pre = mod.preMod.col.uintValue[0];

    mod.preMod.col.uintValue[0] = 0;

    // the UAV copy on the GPU expanded out to full size when writing to the buffer, so we can now
    // just copy across without needing to interpret according to the format.
    {
      byte *data = pixstoreData + sizeof(Vec4f) * pixstoreStride * pre;
      memcpy(&mod.preMod.col.uintValue[0], data, sizeof(Vec4f));
      memcpy(&mod.postMod.col.uintValue[0], data + sizeof(Vec4f), sizeof(Vec4f));
    }

    {
      Vec4f *data = (Vec4f *)(pixstoreDepthData + sizeof(Vec4f) * pixstoreStride * pre);

      mod.preMod.depth = data[0].x;
      mod.preMod.stencil = int32_t(data[0].y);

      mod.postMod.depth = data[1].x;
      mod.postMod.stencil = int32_t(data[1].y);

      // data[2].x (depth) unused
      // fragments writing to the pixel in this event with overlay shader
      mod.shaderOut.col.intValue[0] = int32_t(data[2].y);

      // data[3].x (depth) unused
      // fragments writing to the pixel in this event with original shader
      mod.shaderOut.col.intValue[1] = int32_t(data[3].y);
    }
  }

  m_pImmediateContext->Unmap(pixstoreDepthReadback, 0);
  m_pImmediateContext->Unmap(pixstoreReadback, 0);

  /////////////////////////////////////////////////////////////////////////
  // simple loop to expand out the history events by number of fragments,
  // duplicating and setting fragIndex in each

  for(size_t h = 0; h < history.size();)
  {
    int32_t frags = RDCMAX(1, history[h].shaderOut.col.intValue[0]);
    int32_t fragsClipped = RDCCLAMP(history[h].shaderOut.col.intValue[1], 1, frags);

    // if we have fewer fragments with the original shader, some discarded
    // so we need to do a thorough check to see which fragments discarded
    bool someFragsClipped = (fragsClipped < frags);

    PixelModification mod = history[h];

    for(int32_t f = 1; f < frags; f++)
      history.insert(h + 1, mod);

    for(int32_t f = 0; f < frags; f++)
    {
      history[h + f].fragIndex = f;
      history[h + f].primitiveID = someFragsClipped;
    }

    h += frags;
  }

  uint32_t prev = 0;

  /////////////////////////////////////////////////////////////////////////
  // loop for each fragment, for non-final fragments fetch the post-output
  // buffer value, and for each fetch the shader output value

  uint32_t postColSlot = 0;
  uint32_t shadColSlot = 0;
  uint32_t depthSlot = 0;

  uint32_t rtIndex = 100000;
  ID3D11RenderTargetView *RTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0};

  ID3D11DepthStencilState *ds = NULL;

  D3D11CopyPixelParams shadoutCopyParams = colourCopyParams;
  shadoutCopyParams.sourceTex = shadoutCopyParams.srvTex = shadOutput;
  shadoutCopyParams.srv[0] = shadOutputSRV;
  shadoutCopyParams.uav = shadoutStoreUAV;
  shadoutCopyParams.srcxyCBuf = shadoutsrcxyCBuf;

  depthCopyParams.sourceTex = depthCopyParams.srvTex = shaddepthOutput;
  depthCopyParams.srv[0] = shaddepthOutputDepthSRV;
  depthCopyParams.srv[1] = shaddepthOutputStencilSRV;

  for(size_t h = 0; h < history.size(); h++)
  {
    const ActionDescription *action = m_pDevice->GetAction(history[h].eventId);

    if(action->flags & ActionFlags::Clear)
      continue;

    D3D11MarkerRegion historyData(StringFormat::Fmt("Fetching history data for %u", action->eventId));

    if(prev != history[h].eventId)
    {
      D3D11MarkerRegion predraw("fetching pre-action");

      m_pDevice->ReplayLog(0, history[h].eventId, eReplay_WithoutDraw);
      prev = history[h].eventId;

      curNumScissors = curNumViews = 16;
      m_pImmediateContext->RSGetViewports(&curNumViews, curViewports);

      for(UINT v = 0; v < curNumViews; v++)
      {
        // calculate scissor, relative to this viewport, that encloses only (x,y) pixel

        // if (x,y) pixel isn't in viewport, make empty rect)
        if(xf < curViewports[v].TopLeftX || yf < curViewports[v].TopLeftY ||
           xf >= curViewports[v].TopLeftX + curViewports[v].Width ||
           yf >= curViewports[v].TopLeftY + curViewports[v].Height)
        {
          newScissors[v].left = newScissors[v].top = newScissors[v].bottom = newScissors[v].right = 0;
        }
        else
        {
          newScissors[v].left = LONG(x);
          newScissors[v].top = LONG(y);
          newScissors[v].right = newScissors[v].left + 1;
          newScissors[v].bottom = newScissors[v].top + 1;
        }
      }

      m_pImmediateContext->RSSetScissorRects(curNumViews, newScissors);

      m_pImmediateContext->RSGetState(&curRS);

      D3D11_RASTERIZER_DESC rdesc = {
          /*FillMode =*/D3D11_FILL_SOLID,
          /*CullMode =*/D3D11_CULL_BACK,
          /*FrontCounterClockwise =*/FALSE,
          /*DepthBias =*/D3D11_DEFAULT_DEPTH_BIAS,
          /*DepthBiasClamp =*/D3D11_DEFAULT_DEPTH_BIAS_CLAMP,
          /*SlopeScaledDepthBias =*/D3D11_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
          /*DepthClipEnable =*/TRUE,
          /*ScissorEnable =*/FALSE,
          /*MultisampleEnable =*/FALSE,
          /*AntialiasedLineEnable =*/FALSE,
      };
      if(curRS)
        curRS->GetDesc(&rdesc);

      SAFE_RELEASE(curRS);

      m_pImmediateContext->OMGetDepthStencilState(&curDS, &stencilRef);

      // make a depth-stencil state object that writes to depth, uses same comparison
      // as currently set, and tests stencil INCR_SAT / GREATER_EQUAL for fragment selection
      D3D11_DEPTH_STENCIL_DESC dsdesc = {
          /*DepthEnable =*/TRUE,
          /*DepthWriteMask =*/D3D11_DEPTH_WRITE_MASK_ALL,
          /*DepthFunc =*/D3D11_COMPARISON_LESS,
          /*StencilEnable =*/TRUE,
          /*StencilReadMask =*/D3D11_DEFAULT_STENCIL_READ_MASK,
          /*StencilWriteMask =*/D3D11_DEFAULT_STENCIL_WRITE_MASK,
          /*FrontFace =*/
          {D3D11_STENCIL_OP_INCR_SAT, D3D11_STENCIL_OP_INCR_SAT, D3D11_STENCIL_OP_INCR_SAT,
           D3D11_COMPARISON_GREATER_EQUAL},
          /*BackFace =*/
          {D3D11_STENCIL_OP_INCR_SAT, D3D11_STENCIL_OP_INCR_SAT, D3D11_STENCIL_OP_INCR_SAT,
           D3D11_COMPARISON_GREATER_EQUAL},
      };
      if(curDS)
      {
        D3D11_DEPTH_STENCIL_DESC stateDesc;
        curDS->GetDesc(&stateDesc);
        dsdesc.DepthFunc = stateDesc.DepthFunc;
      }

      if(history[h].preMod.depth < 0.0f)
        dsdesc.DepthEnable = FALSE;

      SAFE_RELEASE(curDS);

      m_pDevice->CreateDepthStencilState(&dsdesc, &ds);

      D3D11_RASTERIZER_DESC rd = rdesc;

      rd.ScissorEnable = TRUE;
      // leave depth clip mode as normal
      // leave backface culling mode as normal

      m_pDevice->CreateRasterizerState(&rd, &newRS);
      m_pImmediateContext->RSSetState(newRS);
      SAFE_RELEASE(newRS);

      for(int i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
        SAFE_RELEASE(RTVs[i]);

      m_pImmediateContext->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, RTVs, NULL);

      rtIndex = 100000;

      for(uint32_t i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
      {
        if(RTVs[i])
        {
          if(rtIndex == 100000)
          {
            ID3D11Resource *res = NULL;
            RTVs[i]->GetResource(&res);

            if(res == targetres)
              rtIndex = i;

            SAFE_RELEASE(res);
          }

          // leave the target RTV in the array
          if(rtIndex != i)
            SAFE_RELEASE(RTVs[i]);
        }
      }

      if(rtIndex == 100000)
      {
        rtIndex = 0;
        RDCWARN("Couldn't find target RT bound at this event");
      }
    }

    float cleardepth = RDCCLAMP(history[h].preMod.depth, 0.0f, 1.0f);

    m_pImmediateContext->ClearDepthStencilView(
        shaddepthOutputDSV, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, cleardepth, 0);

    m_pImmediateContext->OMSetDepthStencilState(ds, history[h].fragIndex);

    // if we're not the last modification in our event, need to fetch post fragment value
    if(h + 1 < history.size() && history[h].eventId == history[h + 1].eventId)
    {
      D3D11MarkerRegion middraw("fetching mid-action");

      m_pImmediateContext->OMSetRenderTargets(rtIndex + 1, RTVs, shaddepthOutputDSV);

      m_pDevice->ReplayLog(0, history[h].eventId, eReplay_OnlyDraw);

      GetDebugManager()->PixelHistoryCopyPixel(colourCopyParams, postColSlot, 0);
      postColSlot++;

      GetDebugManager()->PixelHistoryCopyPixel(depthCopyParams, depthSlot, 1);
    }

    // fetch shader output value & primitive ID
    {
      m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.StencIncrEqDepthState,
                                                  history[h].fragIndex);

      m_pImmediateContext->ClearDepthStencilView(
          shaddepthOutputDSV, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, cleardepth, 0);

      m_pImmediateContext->OMGetBlendState(&curBS, blendFactor, &curSample);

      m_pImmediateContext->OMSetBlendState(NULL, blendFactor, curSample);

      // fetch shader output value
      {
        D3D11MarkerRegion shadout("fetching shader-out");

        ID3D11RenderTargetView *sparseRTVs[8] = {0};
        sparseRTVs[rtIndex] = shadOutputRTV;
        m_pImmediateContext->OMSetRenderTargets(rtIndex + 1, sparseRTVs, shaddepthOutputDSV);

        m_pDevice->ReplayLog(0, history[h].eventId, eReplay_OnlyDraw);

        GetDebugManager()->PixelHistoryCopyPixel(shadoutCopyParams, shadColSlot, 0);

        m_pImmediateContext->OMSetRenderTargets(0, NULL, NULL);

        GetDebugManager()->PixelHistoryCopyPixel(depthCopyParams, depthSlot, 0);
      }

      m_pImmediateContext->ClearDepthStencilView(
          shaddepthOutputDSV, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, cleardepth, 0);

      // fetch primitive ID
      {
        D3D11MarkerRegion primid("fetching prim ID");

        m_pImmediateContext->OMSetRenderTargets(1, &shadOutputRTV, shaddepthOutputDSV);

        m_pImmediateContext->PSGetShader(&curPS, curInst, &curNumInst);
        m_pImmediateContext->PSSetShader(m_PixelHistory.PrimitiveIDPS, NULL, 0);

        if(curPS == NULL)
          history[h].unboundPS = true;

        m_pDevice->ReplayLog(0, history[h].eventId, eReplay_OnlyDraw);

        m_pImmediateContext->PSSetShader(curPS, curInst, curNumInst);

        for(UINT i = 0; i < curNumInst; i++)
          SAFE_RELEASE(curInst[i]);

        SAFE_RELEASE(curPS);

        GetDebugManager()->PixelHistoryCopyPixel(shadoutCopyParams, shadColSlot, 1);
      }

      shadColSlot++;
      depthSlot++;

      m_pImmediateContext->OMSetBlendState(curBS, blendFactor, curSample);
      SAFE_RELEASE(curBS);
    }
  }

  SAFE_RELEASE(ds);

  for(int i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
    SAFE_RELEASE(RTVs[i]);

  m_pImmediateContext->CopyResource(shadoutStoreReadback, shadoutStore);
  m_pImmediateContext->CopyResource(pixstoreReadback, pixstore);
  m_pImmediateContext->CopyResource(pixstoreDepthReadback, pixstoreDepth);

  D3D11_MAPPED_SUBRESOURCE mappedShadout = {0};
  m_pImmediateContext->Map(pixstoreReadback, 0, D3D11_MAP_READ, 0, &mapped);
  m_pImmediateContext->Map(pixstoreDepthReadback, 0, D3D11_MAP_READ, 0, &mappedDepth);
  m_pImmediateContext->Map(shadoutStoreReadback, 0, D3D11_MAP_READ, 0, &mappedShadout);

  byte *shadoutStoreData = (byte *)mappedShadout.pData;
  pixstoreData = (byte *)mapped.pData;
  pixstoreDepthData = (byte *)mappedDepth.pData;

  /////////////////////////////////////////////////////////////////////////
  // final loop to fetch the values from above into the modification events

  postColSlot = 0;
  shadColSlot = 0;
  depthSlot = 0;

  prev = 0;

  // this is used to track if any previous fragments in the current draw
  // discarded. If so, the shader output values will be off-by-one in the
  // shader output storage due to stencil counting errors, and we need to
  // offset.
  uint32_t discardedOffset = 0;

  ModificationValue lastKnownGood = {};
  if(!history.isEmpty())
    lastKnownGood = history[0].preMod;

  for(size_t h = 0; h < history.size(); h++)
  {
    const ActionDescription *action = m_pDevice->GetAction(history[h].eventId);

    if(action->flags & ActionFlags::Clear)
      continue;

    // reset discarded offset every event
    if(h > 0 && history[h].eventId != history[h - 1].eventId)
    {
      discardedOffset = 0;
      lastKnownGood = history[h].preMod;
    }

    // if we're not the last modification in our event, need to fetch post fragment value
    bool lastMod = true;
    if(h + 1 < history.size() && history[h].eventId == history[h + 1].eventId)
    {
      lastMod = false;
      // colour
      {
        uint32_t offsettedSlot = (postColSlot - discardedOffset);
        byte *data = pixstoreData + sizeof(Vec4f) * pixstoreStride * offsettedSlot;
        memcpy(&history[h].postMod.col.uintValue[0], data, sizeof(Vec4f));
      }

      {
        uint32_t offsettedSlot = (depthSlot - discardedOffset);
        RDCASSERT(discardedOffset <= depthSlot);
        // post fragment depth is in slot 1 of the depth
        float *depthdata = (float *)(pixstoreDepthData +
                                     sizeof(Vec4f) * pixstoreStride * offsettedSlot + sizeof(Vec4f));

        // this is not exactly the right value when the original depth was D16, it will be slightly
        // higher precision than the actual value but that's better than not having a value at all,
        // and allows us to identify fragments within a draw which fail the depth test.
        if(history[h].preMod.depth >= 0.0f)
          history[h].postMod.depth = *depthdata;

        // we can't retrieve stencil value after each fragment, as we use stencil to identify the
        // fragment
        if(history[h].preMod.stencil >= 0)
          history[h].postMod.stencil = -2;
        else
          history[h].postMod.stencil = -1;
      }

      // in each case we only mark as "unknown" when the depth/stencil isn't already known to be
      // unbound

      postColSlot++;
    }

    // if we're not the first modification in our event, set our preMod to the previous postMod
    if(h > 0 && history[h].eventId == history[h - 1].eventId)
    {
      history[h].preMod = history[h - 1].postMod;
    }

    // fetch shader output value
    {
      bool someFragsClipped = history[h].primitiveID != 0;

      // colour
      {
        // shader output is always 4 32bit components, so we can copy straight
        uint32_t offsettedSlot = (shadColSlot - discardedOffset);
        RDCASSERT(discardedOffset <= shadColSlot);

        byte *data = shadoutStoreData + sizeof(Vec4f) * pixstoreStride * offsettedSlot;

        memcpy(&history[h].shaderOut.col.uintValue[0], data, 4 * sizeof(float));

        // primitive ID is in slot 1 and ignores any discards
        data = shadoutStoreData + sizeof(Vec4f) * pixstoreStride * shadColSlot;
        memcpy(&history[h].primitiveID, data + sizeof(Vec4f), sizeof(uint32_t));
      }

      // depth
      {
        uint32_t offsettedSlot = (depthSlot - discardedOffset);
        RDCASSERT(discardedOffset <= depthSlot);

        float *data = (float *)(pixstoreDepthData + sizeof(Vec4f) * pixstoreStride * offsettedSlot);

        history[h].shaderOut.depth = data[0];
        if(history[h].postMod.stencil == -1)
          history[h].shaderOut.stencil = -1;
        else
          // can't retrieve this as we use stencil to identify each fragment
          history[h].shaderOut.stencil = -2;
      }

      // if some fragments clipped in this draw, we need to check to see if this
      // primitive ID was one of the ones that clipped.
      // Currently the way we do that is by drawing only that primitive
      // and doing a
      if(someFragsClipped)
      {
        // don't need to worry about trashing state, since at this point we don't need to restore it
        // anymore
        if(prev != history[h].eventId)
        {
          m_pDevice->ReplayLog(0, history[h].eventId, eReplay_WithoutDraw);

          //////////////////////////////////////////////////////////////
          // Set up an identical raster state, but with scissor enabled.
          // This matches the setup when we were originally fetching the
          // number of fragments.
          m_pImmediateContext->RSGetState(&curRS);

          D3D11_RASTERIZER_DESC rsDesc = {
              /*FillMode =*/D3D11_FILL_SOLID,
              /*CullMode =*/D3D11_CULL_BACK,
              /*FrontCounterClockwise =*/FALSE,
              /*DepthBias =*/D3D11_DEFAULT_DEPTH_BIAS,
              /*DepthBiasClamp =*/D3D11_DEFAULT_DEPTH_BIAS_CLAMP,
              /*SlopeScaledDepthBias =*/D3D11_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
              /*DepthClipEnable =*/TRUE,
              /*ScissorEnable =*/FALSE,
              /*MultisampleEnable =*/FALSE,
              /*AntialiasedLineEnable =*/FALSE,
          };

          if(curRS)
            curRS->GetDesc(&rsDesc);

          SAFE_RELEASE(curRS);

          rsDesc.ScissorEnable = TRUE;

          // scissor to our pixel
          newScissors[0].left = LONG(x);
          newScissors[0].top = LONG(y);
          newScissors[0].right = newScissors[0].left + 1;
          newScissors[0].bottom = newScissors[0].top + 1;

          m_pImmediateContext->RSSetScissorRects(1, newScissors);

          m_pDevice->CreateRasterizerState(&rsDesc, &newRS);

          m_pImmediateContext->RSSetState(newRS);

          // other states can just be set to always pass, we already know this primitive ID renders
          m_pImmediateContext->OMSetBlendState(m_PixelHistory.NopBlendState, blendFactor, sampleMask);
          m_pImmediateContext->OMSetRenderTargets(0, NULL, shaddepthOutputDSV);
          m_pImmediateContext->OMSetDepthStencilState(m_PixelHistory.AllPassDepthState, 0);

          SAFE_RELEASE(newRS);
        }
        prev = history[h].eventId;

        m_pImmediateContext->ClearDepthStencilView(
            shaddepthOutputDSV, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 0.0f, 0);

        m_pImmediateContext->Begin(testQueries[0]);

        Topology topo =
            MakePrimitiveTopology(m_pImmediateContext->GetCurrentPipelineState()->IA.Topo);

        // do action
        if(action->flags & ActionFlags::Indexed)
        {
          // TODO once pixel history distinguishes between instances, draw only the instance for
          // this fragment
          m_pImmediateContext->DrawIndexedInstanced(
              RENDERDOC_NumVerticesPerPrimitive(topo), RDCMAX(1U, action->numInstances),
              action->indexOffset + RENDERDOC_VertexOffset(topo, history[h].primitiveID),
              action->baseVertex, action->instanceOffset);
        }
        else
        {
          m_pImmediateContext->DrawInstanced(
              RENDERDOC_NumVerticesPerPrimitive(topo), RDCMAX(1U, action->numInstances),
              action->vertexOffset + RENDERDOC_VertexOffset(topo, history[h].primitiveID),
              action->instanceOffset);
        }

        m_pImmediateContext->End(testQueries[0]);

        do
        {
          hr = m_pImmediateContext->GetData(testQueries[0], &occlData, sizeof(occlData), 0);
        } while(hr == S_FALSE);
        RDCASSERTEQUAL(hr, S_OK);

        if(occlData == 0)
        {
          history[h].shaderDiscarded = true;
          discardedOffset++;
          RDCEraseEl(history[h].shaderOut);
          history[h].shaderOut.depth = -1.0f;
          history[h].shaderOut.stencil = -1;
          if(!lastMod)
            history[h].postMod = lastKnownGood;
        }
        else
        {
          lastKnownGood = history[h].postMod;
        }
      }

      shadColSlot++;
      depthSlot++;
    }

    // check the depth value between premod/shaderout against the known test if we have valid depth
    // values, as we don't have per-fragment depth test information.
    if(history[h].preMod.depth >= 0.0f && history[h].shaderOut.depth >= 0.0f)
    {
      DXGI_FORMAT dfmt = depthFormats[history[h].eventId];
      float shadDepth = history[h].shaderOut.depth;

      // quantise depth to match before comparing
      if(dfmt == DXGI_FORMAT_D24_UNORM_S8_UINT || dfmt == DXGI_FORMAT_X24_TYPELESS_G8_UINT ||
         dfmt == DXGI_FORMAT_R24_UNORM_X8_TYPELESS || dfmt == DXGI_FORMAT_R24G8_TYPELESS)
      {
        shadDepth = float(uint32_t(float(shadDepth * 0xffffff))) / float(0xffffff);
      }
      else if(dfmt == DXGI_FORMAT_D16_UNORM || dfmt == DXGI_FORMAT_R16_TYPELESS ||
              dfmt == DXGI_FORMAT_R16_UNORM)
      {
        shadDepth = float(uint32_t(float(shadDepth * 0xffff))) / float(0xffff);
      }

      bool passed = true;
      if(depthOps[history[h].eventId] == D3D11_COMPARISON_EQUAL)
        passed = (shadDepth == history[h].preMod.depth);
      else if(depthOps[history[h].eventId] == D3D11_COMPARISON_NOT_EQUAL)
        passed = (shadDepth != history[h].preMod.depth);
      else if(depthOps[history[h].eventId] == D3D11_COMPARISON_LESS)
        passed = (shadDepth < history[h].preMod.depth);
      else if(depthOps[history[h].eventId] == D3D11_COMPARISON_LESS_EQUAL)
        passed = (shadDepth <= history[h].preMod.depth);
      else if(depthOps[history[h].eventId] == D3D11_COMPARISON_GREATER)
        passed = (shadDepth > history[h].preMod.depth);
      else if(depthOps[history[h].eventId] == D3D11_COMPARISON_GREATER_EQUAL)
        passed = (shadDepth >= history[h].preMod.depth);

      history[h].depthTestFailed = !passed;
    }
  }

  m_pImmediateContext->Unmap(shadoutStoreReadback, 0);
  m_pImmediateContext->Unmap(pixstoreReadback, 0);
  m_pImmediateContext->Unmap(pixstoreDepthReadback, 0);

  for(size_t i = 0; i < ARRAY_COUNT(testQueries); i++)
    SAFE_RELEASE(testQueries[i]);

  SAFE_RELEASE(pixstore);
  SAFE_RELEASE(shadoutStore);
  SAFE_RELEASE(pixstoreDepth);

  SAFE_RELEASE(pixstoreReadback);
  SAFE_RELEASE(shadoutStoreReadback);
  SAFE_RELEASE(pixstoreDepthReadback);

  SAFE_RELEASE(pixstoreUAV);
  SAFE_RELEASE(shadoutStoreUAV);
  SAFE_RELEASE(pixstoreDepthUAV);

  SAFE_RELEASE(shadOutput);
  SAFE_RELEASE(shadOutputSRV);
  SAFE_RELEASE(shadOutputRTV);
  SAFE_RELEASE(shaddepthOutput);
  SAFE_RELEASE(shaddepthOutputDSV);
  SAFE_RELEASE(shaddepthOutputDepthSRV);
  SAFE_RELEASE(shaddepthOutputStencilSRV);

  SAFE_RELEASE(depthCopyD24S8);
  SAFE_RELEASE(depthCopyD24S8_DepthSRV);
  SAFE_RELEASE(depthCopyD24S8_StencilSRV);

  SAFE_RELEASE(depthCopyD32S8);
  SAFE_RELEASE(depthCopyD32S8_DepthSRV);
  SAFE_RELEASE(depthCopyD32S8_StencilSRV);

  SAFE_RELEASE(depthCopyD32);
  SAFE_RELEASE(depthCopyD32_DepthSRV);

  SAFE_RELEASE(depthCopyD16);
  SAFE_RELEASE(depthCopyD16_DepthSRV);

  SAFE_RELEASE(srcxyCBuf);
  SAFE_RELEASE(shadoutsrcxyCBuf);
  SAFE_RELEASE(storeCBuf);

  return history;
}