in renderdoc/driver/vulkan/vk_replay.cpp [3686:4630]
void VulkanReplay::GetTextureData(ResourceId tex, const Subresource &sub,
const GetTextureDataParams ¶ms, bytebuf &data)
{
bool wasms = false;
bool resolve = params.resolve;
bool copyToBuffer = true;
if(m_pDriver->m_CreationInfo.m_Image.find(tex) == m_pDriver->m_CreationInfo.m_Image.end())
{
RDCERR("Trying to get texture data for unknown ID %s!", ToStr(tex).c_str());
return;
}
const VulkanCreationInfo::Image &imInfo = m_pDriver->m_CreationInfo.m_Image[tex];
LockedConstImageStateRef lockedImage = m_pDriver->FindConstImageState(tex);
if(!lockedImage || !lockedImage->isMemoryBound)
return;
const ImageState *srcImageState = &*lockedImage;
ImageState tmpImageState;
VkMarkerRegion region(StringFormat::Fmt("GetTextureData(%u, %u, %u, remap=%d)", sub.mip,
sub.slice, sub.sample, params.remap));
Subresource s = sub;
s.slice = RDCMIN(uint32_t(imInfo.arrayLayers - 1), s.slice);
s.sample = RDCMIN(uint32_t(imInfo.samples - 1), s.sample);
s.mip = RDCMIN(uint32_t(imInfo.mipLevels - 1), s.mip);
VkImageCreateInfo imCreateInfo = {
VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
NULL,
0,
imInfo.type,
imInfo.format,
imInfo.extent,
imInfo.mipLevels,
imInfo.arrayLayers,
imInfo.samples,
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
VK_SHARING_MODE_EXCLUSIVE,
0,
NULL,
VK_IMAGE_LAYOUT_UNDEFINED,
};
VkImageAspectFlags imageAspects = FormatImageAspects(imInfo.format);
bool isDepth = (imageAspects & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
bool isStencil = (imageAspects & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
bool isPlanar = (imageAspects & VK_IMAGE_ASPECT_PLANE_0_BIT) != 0;
uint32_t planeCount = GetYUVPlaneCount(imInfo.format);
VkImage liveWrappedImage = GetResourceManager()->GetCurrentHandle<VkImage>(tex);
VkImage srcImage = Unwrap(liveWrappedImage);
VkImage tmpImage = VK_NULL_HANDLE;
VkImage wrappedTmpImage = VK_NULL_HANDLE;
VkDeviceMemory tmpMemory = VK_NULL_HANDLE;
VkFramebuffer *tmpFB = NULL;
VkImageView *tmpView = NULL;
uint32_t numFBs = 0;
VkRenderPass tmpRP = VK_NULL_HANDLE;
VkRenderPass tmpRPStencil = VK_NULL_HANDLE;
VkDevice dev = m_pDriver->GetDev();
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
const VkDevDispatchTable *vt = ObjDisp(dev);
if(cmd == VK_NULL_HANDLE)
return;
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
VkResult vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
CheckVkResult(vkr);
uint32_t dataSize = 0;
VkBuffer readbackBuf = VK_NULL_HANDLE;
VkDeviceMemory readbackMem = VK_NULL_HANDLE;
if(imInfo.samples > 1)
{
// make image n-array instead of n-samples
imCreateInfo.arrayLayers *= imCreateInfo.samples;
imCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
wasms = true;
}
if(wasms && (isDepth || isStencil))
resolve = false;
if(params.remap != RemapTexture::NoRemap)
{
int renderFlags = 0;
// force readback texture to RGBA8 unorm
if(params.remap == RemapTexture::RGBA8)
{
if(IsSRGBFormat(imCreateInfo.format))
{
imCreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB;
renderFlags |= eTexDisplay_RemapSRGB;
}
else
{
imCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
}
}
else if(params.remap == RemapTexture::RGBA16)
{
imCreateInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
renderFlags = eTexDisplay_16Render;
}
else if(params.remap == RemapTexture::RGBA32)
{
imCreateInfo.format = VK_FORMAT_R32G32B32A32_SFLOAT;
renderFlags = eTexDisplay_32Render;
}
else
{
RDCERR("Unsupported remap format: %u", params.remap);
}
imCreateInfo.format = GetViewCastedFormat(imCreateInfo.format, BaseRemapType(params));
if(IsUIntFormat(imCreateInfo.format))
renderFlags |= eTexDisplay_RemapUInt;
else if(IsSIntFormat(imCreateInfo.format))
renderFlags |= eTexDisplay_RemapSInt;
else
renderFlags |= eTexDisplay_RemapFloat;
// force to 1 array slice, 1 mip
imCreateInfo.arrayLayers = 1;
imCreateInfo.mipLevels = 1;
// force to 2D
imCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imCreateInfo.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
// we'll need to cast to remap the stencil part
if(IsStencilFormat(imInfo.format))
imCreateInfo.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
imCreateInfo.extent.width = RDCMAX(1U, imCreateInfo.extent.width >> s.mip);
imCreateInfo.extent.height = RDCMAX(1U, imCreateInfo.extent.height >> s.mip);
imCreateInfo.extent.depth = RDCMAX(1U, imCreateInfo.extent.depth >> s.mip);
// convert a 3D texture into a 2D array, so we can render to the slices without needing
// KHR_maintenance1
if(imCreateInfo.extent.depth > 1)
{
imCreateInfo.arrayLayers = imCreateInfo.extent.depth;
imCreateInfo.extent.depth = 1;
}
// create render texture similar to readback texture
vt->CreateImage(Unwrap(dev), &imCreateInfo, NULL, &tmpImage);
wrappedTmpImage = tmpImage;
GetResourceManager()->WrapResource(Unwrap(dev), wrappedTmpImage);
tmpImageState = ImageState(wrappedTmpImage, ImageInfo(imCreateInfo), eFrameRef_None);
NameVulkanObject(wrappedTmpImage, "GetTextureData tmpImage");
VkMemoryRequirements mrq = {0};
vt->GetImageMemoryRequirements(Unwrap(dev), tmpImage, &mrq);
VkMemoryAllocateInfo allocInfo = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
NULL,
mrq.size,
m_pDriver->GetGPULocalMemoryIndex(mrq.memoryTypeBits),
};
vkr = vt->AllocateMemory(Unwrap(dev), &allocInfo, NULL, &tmpMemory);
CheckVkResult(vkr);
if(vkr != VK_SUCCESS)
return;
vkr = vt->BindImageMemory(Unwrap(dev), tmpImage, tmpMemory, 0);
CheckVkResult(vkr);
tmpImageState.InlineTransition(
cmd, m_pDriver->m_QueueFamilyIdx, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 0,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, m_pDriver->GetImageTransitionInfo());
// end this command buffer, the rendertexture below will use its own and we want to ensure
// ordering
vt->EndCommandBuffer(Unwrap(cmd));
if(Vulkan_Debug_SingleSubmitFlushing())
m_pDriver->SubmitCmds();
// create framebuffer/render pass to render to
VkAttachmentDescription attDesc = {0,
imCreateInfo.format,
VK_SAMPLE_COUNT_1_BIT,
VK_ATTACHMENT_LOAD_OP_LOAD,
VK_ATTACHMENT_STORE_OP_STORE,
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
VK_ATTACHMENT_STORE_OP_DONT_CARE,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkAttachmentReference attRef = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
VkSubpassDescription subpass = {
0, VK_PIPELINE_BIND_POINT_GRAPHICS,
0, NULL, // inputs
1, &attRef, // color
NULL, // resolve
NULL, // depth-stencil
0, NULL, // preserve
};
VkRenderPassCreateInfo rpinfo = {
VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
NULL,
0,
1,
&attDesc,
1,
&subpass,
0,
NULL, // dependencies
};
vt->CreateRenderPass(Unwrap(dev), &rpinfo, NULL, &tmpRP);
numFBs = imCreateInfo.arrayLayers;
// we'll need twice as many temp views/FBs for stencil views
if(IsStencilFormat(imInfo.format))
{
tmpFB = new VkFramebuffer[numFBs * 2];
tmpView = new VkImageView[numFBs * 2];
}
else
{
tmpFB = new VkFramebuffer[numFBs];
tmpView = new VkImageView[numFBs];
}
int oldW = m_DebugWidth, oldH = m_DebugHeight;
m_DebugWidth = imCreateInfo.extent.width;
m_DebugHeight = imCreateInfo.extent.height;
int renderCount = 0;
// if 3d texture, render each slice separately, otherwise render once
for(uint32_t i = 0; i < numFBs; i++)
{
if(numFBs > 1 && (renderCount % m_TexRender.UBO.GetRingCount()) == 0)
{
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
}
TextureDisplay texDisplay;
texDisplay.red = texDisplay.green = texDisplay.blue = texDisplay.alpha = true;
texDisplay.hdrMultiplier = -1.0f;
texDisplay.linearDisplayAsGamma = false;
texDisplay.overlay = DebugOverlay::NoOverlay;
texDisplay.flipY = false;
texDisplay.subresource.mip = s.mip;
texDisplay.subresource.slice = imInfo.type == VK_IMAGE_TYPE_3D ? i : s.slice;
texDisplay.subresource.sample =
imInfo.type == VK_IMAGE_TYPE_3D ? 0 : (resolve ? ~0U : s.sample);
texDisplay.customShaderId = ResourceId();
texDisplay.rangeMin = params.blackPoint;
texDisplay.rangeMax = params.whitePoint;
texDisplay.scale = 1.0f;
texDisplay.resourceId = tex;
texDisplay.typeCast = params.typeCast;
texDisplay.rawOutput = false;
texDisplay.xOffset = 0;
texDisplay.yOffset = 0;
VkImageViewCreateInfo viewInfo = {
VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
NULL,
0,
tmpImage,
VK_IMAGE_VIEW_TYPE_2D,
imCreateInfo.format,
{VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY},
{
VK_IMAGE_ASPECT_COLOR_BIT,
0,
VK_REMAINING_MIP_LEVELS,
i,
1,
},
};
vkr = vt->CreateImageView(Unwrap(dev), &viewInfo, NULL, &tmpView[i]);
CheckVkResult(vkr);
NameUnwrappedVulkanObject(tmpView[i], "GetTextureData tmpView[i]");
VkFramebufferCreateInfo fbinfo = {
VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
NULL,
0,
tmpRP,
1,
&tmpView[i],
(uint32_t)imCreateInfo.extent.width,
(uint32_t)imCreateInfo.extent.height,
1,
};
vkr = vt->CreateFramebuffer(Unwrap(dev), &fbinfo, NULL, &tmpFB[i]);
CheckVkResult(vkr);
VkClearValue clearval = {};
VkRenderPassBeginInfo rpbegin = {
VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
NULL,
tmpRP,
tmpFB[i],
{{
0,
0,
},
{imCreateInfo.extent.width, imCreateInfo.extent.height}},
1,
&clearval,
};
RenderTextureInternal(texDisplay, *srcImageState, rpbegin, renderFlags);
renderCount++;
// for textures with stencil, do another draw to copy the stencil
if(isStencil)
{
viewInfo.format = GetViewCastedFormat(viewInfo.format, CompType::UInt);
attDesc.format = viewInfo.format;
vkr = vt->CreateRenderPass(Unwrap(dev), &rpinfo, NULL, &tmpRPStencil);
CheckVkResult(vkr);
fbinfo.renderPass = tmpRPStencil;
rpbegin.renderPass = tmpRPStencil;
vkr = vt->CreateImageView(Unwrap(dev), &viewInfo, NULL, &tmpView[i + numFBs]);
CheckVkResult(vkr);
NameUnwrappedVulkanObject(tmpView[i + numFBs], "GetTextureData tmpView[i]");
fbinfo.pAttachments = &tmpView[i + numFBs];
vkr = vt->CreateFramebuffer(Unwrap(dev), &fbinfo, NULL, &tmpFB[i + numFBs]);
CheckVkResult(vkr);
rpbegin.framebuffer = tmpFB[i + numFBs];
int stencilFlags = renderFlags;
stencilFlags &= ~eTexDisplay_RemapFloat;
stencilFlags &= ~eTexDisplay_RemapSRGB;
stencilFlags |= eTexDisplay_RemapUInt | eTexDisplay_GreenOnly;
texDisplay.red = texDisplay.blue = texDisplay.alpha = false;
// S8 renders into red
if(IsStencilOnlyFormat(imInfo.format))
{
texDisplay.red = true;
texDisplay.green = false;
stencilFlags &= ~eTexDisplay_GreenOnly;
}
RenderTextureInternal(texDisplay, *srcImageState, rpbegin, stencilFlags);
renderCount++;
}
}
m_DebugWidth = oldW;
m_DebugHeight = oldH;
srcImage = tmpImage;
srcImageState = &tmpImageState;
// fetch a new command buffer for copy & readback
cmd = m_pDriver->GetNextCmd();
if(cmd == VK_NULL_HANDLE)
return;
vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
CheckVkResult(vkr);
tmpImageState.InlineTransition(cmd, m_pDriver->m_QueueFamilyIdx,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT, m_pDriver->GetImageTransitionInfo());
// these have already been selected, don't need to fetch that subresource
// when copying back to readback buffer
s.slice = 0;
s.mip = 0;
// no longer depth, if it was
isDepth = false;
isStencil = false;
}
else if(wasms && resolve)
{
// force to 1 array slice, 1 mip
imCreateInfo.arrayLayers = 1;
imCreateInfo.mipLevels = 1;
imCreateInfo.extent.width = RDCMAX(1U, imCreateInfo.extent.width >> s.mip);
imCreateInfo.extent.height = RDCMAX(1U, imCreateInfo.extent.height >> s.mip);
// create resolve texture
vt->CreateImage(Unwrap(dev), &imCreateInfo, NULL, &tmpImage);
wrappedTmpImage = tmpImage;
GetResourceManager()->WrapResource(Unwrap(dev), wrappedTmpImage);
tmpImageState = ImageState(wrappedTmpImage, ImageInfo(imCreateInfo), eFrameRef_None);
NameVulkanObject(wrappedTmpImage, "GetTextureData tmpImage");
VkMemoryRequirements mrq = {0};
vt->GetImageMemoryRequirements(Unwrap(dev), tmpImage, &mrq);
VkMemoryAllocateInfo allocInfo = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
NULL,
mrq.size,
m_pDriver->GetGPULocalMemoryIndex(mrq.memoryTypeBits),
};
vkr = vt->AllocateMemory(Unwrap(dev), &allocInfo, NULL, &tmpMemory);
CheckVkResult(vkr);
if(vkr != VK_SUCCESS)
return;
vkr = vt->BindImageMemory(Unwrap(dev), tmpImage, tmpMemory, 0);
CheckVkResult(vkr);
RDCASSERT(!isDepth && !isStencil);
VkImageResolve resolveRegion = {
{VK_IMAGE_ASPECT_COLOR_BIT, s.mip, s.slice, 1},
{0, 0, 0},
{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1},
{0, 0, 0},
imCreateInfo.extent,
};
tmpImageState.InlineTransition(
cmd, m_pDriver->m_QueueFamilyIdx, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0,
VK_ACCESS_TRANSFER_WRITE_BIT, m_pDriver->GetImageTransitionInfo());
ImageBarrierSequence setupBarriers, cleanupBarriers;
srcImageState->TempTransition(m_pDriver->m_QueueFamilyIdx, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_ACCESS_TRANSFER_READ_BIT, setupBarriers, cleanupBarriers,
m_pDriver->GetImageTransitionInfo());
m_pDriver->InlineSetupImageBarriers(cmd, setupBarriers);
m_pDriver->SubmitAndFlushImageStateBarriers(setupBarriers);
// resolve from live texture to resolve texture
vt->CmdResolveImage(Unwrap(cmd), srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tmpImage,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &resolveRegion);
tmpImageState.InlineTransition(cmd, m_pDriver->m_QueueFamilyIdx,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT, m_pDriver->GetImageTransitionInfo());
m_pDriver->InlineCleanupImageBarriers(cmd, cleanupBarriers);
if(!cleanupBarriers.empty())
{
// ensure this resolve happens before handing back the source image to the original queue
vkr = vt->EndCommandBuffer(Unwrap(cmd));
CheckVkResult(vkr);
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
m_pDriver->SubmitAndFlushImageStateBarriers(cleanupBarriers);
// fetch a new command buffer for remaining work
cmd = m_pDriver->GetNextCmd();
if(cmd == VK_NULL_HANDLE)
return;
vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
CheckVkResult(vkr);
}
srcImageState = &tmpImageState;
srcImage = tmpImage;
// these have already been selected, don't need to fetch that subresource
// when copying back to readback buffer
s.slice = 0;
s.mip = 0;
}
else if(wasms)
{
dataSize = GetByteSize(imInfo.extent.width, imInfo.extent.height, imInfo.extent.depth,
imCreateInfo.format, s.mip);
// buffer size needs to be align to the int for shader writing
VkBufferCreateInfo bufInfo = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
NULL,
0,
AlignUp(dataSize, 4U),
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
};
vkr = vt->CreateBuffer(Unwrap(dev), &bufInfo, NULL, &readbackBuf);
CheckVkResult(vkr);
VkMemoryRequirements mrq = {0};
vt->GetBufferMemoryRequirements(Unwrap(dev), readbackBuf, &mrq);
VkMemoryAllocateInfo allocInfo = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
NULL,
mrq.size,
m_pDriver->GetReadbackMemoryIndex(mrq.memoryTypeBits),
};
vkr = vt->AllocateMemory(Unwrap(dev), &allocInfo, NULL, &readbackMem);
CheckVkResult(vkr);
if(vkr != VK_SUCCESS)
return;
vkr = vt->BindBufferMemory(Unwrap(dev), readbackBuf, readbackMem, 0);
CheckVkResult(vkr);
// copy/expand multisampled live texture to readback buffer
ImageBarrierSequence setupBarriers, cleanupBarriers;
srcImageState->TempTransition(m_pDriver->m_QueueFamilyIdx,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_ACCESS_SHADER_READ_BIT, setupBarriers, cleanupBarriers,
m_pDriver->GetImageTransitionInfo());
m_pDriver->InlineSetupImageBarriers(cmd, setupBarriers);
m_pDriver->SubmitAndFlushImageStateBarriers(setupBarriers);
GetDebugManager()->CopyTex2DMSToBuffer(cmd, readbackBuf, srcImage, imCreateInfo.extent, s.slice,
1, s.sample, 1, imCreateInfo.format);
m_pDriver->InlineCleanupImageBarriers(cmd, cleanupBarriers);
if(!cleanupBarriers.empty())
{
// ensure this resolve happens before handing back the source image to the original queue
vkr = vt->EndCommandBuffer(Unwrap(cmd));
CheckVkResult(vkr);
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
m_pDriver->SubmitAndFlushImageStateBarriers(cleanupBarriers);
// fetch a new command buffer for remaining work
cmd = m_pDriver->GetNextCmd();
if(cmd == VK_NULL_HANDLE)
return;
vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
CheckVkResult(vkr);
}
// readback buffer has already been populated, no need to call CmdCopyImageToBuffer
copyToBuffer = false;
}
VkDeviceSize stencilOffset = 0;
// if we have no tmpImage, we're copying directly from the real image
if(copyToBuffer)
{
ImageBarrierSequence cleanupBarriers;
if(tmpImage == VK_NULL_HANDLE)
{
ImageBarrierSequence setupBarriers;
srcImageState->TempTransition(m_pDriver->m_QueueFamilyIdx, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_ACCESS_TRANSFER_READ_BIT, setupBarriers, cleanupBarriers,
m_pDriver->GetImageTransitionInfo());
m_pDriver->InlineSetupImageBarriers(cmd, setupBarriers);
m_pDriver->SubmitAndFlushImageStateBarriers(setupBarriers);
}
rdcarray<VkBufferImageCopy> copyregions;
VkBufferImageCopy copyRegionTemplate = {
0,
0,
0,
{VK_IMAGE_ASPECT_NONE, s.mip, s.slice, 1},
{
0,
0,
0,
},
{RDCMAX(1U, imCreateInfo.extent.width >> s.mip),
RDCMAX(1U, imCreateInfo.extent.height >> s.mip),
RDCMAX(1U, imCreateInfo.extent.depth >> s.mip)},
};
if(isDepth || isStencil)
{
if(isDepth)
{
copyRegionTemplate.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
copyregions.push_back(copyRegionTemplate);
// Stencil offset (if present)
copyRegionTemplate.bufferOffset = stencilOffset =
GetByteSize(imInfo.extent.width, imInfo.extent.height, imInfo.extent.depth,
GetDepthOnlyFormat(imCreateInfo.format), s.mip);
copyRegionTemplate.bufferOffset = AlignUp(copyRegionTemplate.bufferOffset, (VkDeviceSize)4);
}
if(isStencil)
{
copyRegionTemplate.imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
copyregions.push_back(copyRegionTemplate);
}
}
else if(isPlanar)
{
for(uint32_t i = 0; i < planeCount; i++)
{
copyRegionTemplate.imageSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT << i;
VkExtent2D planeExtent =
GetPlaneShape(RDCMAX(1U, imCreateInfo.extent.width >> s.mip),
RDCMAX(1U, imCreateInfo.extent.height >> s.mip), imCreateInfo.format, i);
copyRegionTemplate.imageExtent.width = planeExtent.width;
copyRegionTemplate.imageExtent.height = planeExtent.height;
copyregions.push_back(copyRegionTemplate);
copyRegionTemplate.bufferOffset +=
GetPlaneByteSize(imCreateInfo.extent.width, imCreateInfo.extent.height,
imCreateInfo.extent.depth, imCreateInfo.format, s.mip, i);
}
}
else
{
copyRegionTemplate.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copyregions.push_back(copyRegionTemplate);
}
dataSize = GetByteSize(imInfo.extent.width, imInfo.extent.height, imInfo.extent.depth,
imCreateInfo.format, s.mip);
if(imCreateInfo.format == VK_FORMAT_D24_UNORM_S8_UINT)
{
// for most combined depth-stencil images this will be large enough for both to be copied
// separately, but for D24S8 we need to add extra space since they won't be copied packed
dataSize = AlignUp(dataSize, 4U);
dataSize += GetByteSize(imInfo.extent.width, imInfo.extent.height, imInfo.extent.depth,
VK_FORMAT_S8_UINT, s.mip);
}
VkBufferCreateInfo bufInfo = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
NULL,
0,
dataSize,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
};
vkr = vt->CreateBuffer(Unwrap(dev), &bufInfo, NULL, &readbackBuf);
CheckVkResult(vkr);
VkMemoryRequirements mrq = {0};
vt->GetBufferMemoryRequirements(Unwrap(dev), readbackBuf, &mrq);
VkMemoryAllocateInfo allocInfo = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
NULL,
mrq.size,
m_pDriver->GetReadbackMemoryIndex(mrq.memoryTypeBits),
};
vkr = vt->AllocateMemory(Unwrap(dev), &allocInfo, NULL, &readbackMem);
CheckVkResult(vkr);
if(vkr != VK_SUCCESS)
return;
vkr = vt->BindBufferMemory(Unwrap(dev), readbackBuf, readbackMem, 0);
CheckVkResult(vkr);
if(imInfo.type == VK_IMAGE_TYPE_3D && params.remap != RemapTexture::NoRemap)
{
// copy in each slice from the 2D array we created to render out the 3D texture
for(uint32_t i = 0; i < imCreateInfo.arrayLayers; i++)
{
copyregions[0].imageSubresource.baseArrayLayer = i;
copyregions[0].bufferOffset =
i * GetByteSize(imCreateInfo.extent.width, imCreateInfo.extent.height, 1,
imCreateInfo.format, s.mip);
vt->CmdCopyImageToBuffer(Unwrap(cmd), srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
readbackBuf, (uint32_t)copyregions.size(), copyregions.data());
}
}
else
{
if(imInfo.type == VK_IMAGE_TYPE_3D)
copyregions[0].imageSubresource.baseArrayLayer = 0;
// copy from desired subresource in srcImage to buffer
vt->CmdCopyImageToBuffer(Unwrap(cmd), srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
readbackBuf, (uint32_t)copyregions.size(), copyregions.data());
}
// if we have no tmpImage, we're copying directly from the real image
if(tmpImage == VK_NULL_HANDLE)
{
m_pDriver->InlineCleanupImageBarriers(cmd, cleanupBarriers);
if(!cleanupBarriers.empty())
{
// ensure this resolve happens before handing back the source image to the original queue
vkr = vt->EndCommandBuffer(Unwrap(cmd));
CheckVkResult(vkr);
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
m_pDriver->SubmitAndFlushImageStateBarriers(cleanupBarriers);
// fetch a new command buffer for remaining work
cmd = m_pDriver->GetNextCmd();
if(cmd == VK_NULL_HANDLE)
return;
vkr = vt->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
CheckVkResult(vkr);
}
}
}
VkBufferMemoryBarrier bufBarrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
NULL,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_HOST_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
readbackBuf,
0,
dataSize,
};
// wait for copy to finish before reading back to host
DoPipelineBarrier(cmd, 1, &bufBarrier);
vt->EndCommandBuffer(Unwrap(cmd));
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
// map the buffer and copy to return buffer
byte *pData = NULL;
vkr = vt->MapMemory(Unwrap(dev), readbackMem, 0, VK_WHOLE_SIZE, 0, (void **)&pData);
CheckVkResult(vkr);
if(vkr != VK_SUCCESS)
return;
if(!pData)
{
RDCERR("Manually reporting failed memory map");
CheckVkResult(VK_ERROR_MEMORY_MAP_FAILED);
return;
}
VkMappedMemoryRange range = {
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, readbackMem, 0, VK_WHOLE_SIZE,
};
vkr = vt->InvalidateMappedMemoryRanges(Unwrap(dev), 1, &range);
CheckVkResult(vkr);
RDCASSERT(pData != NULL);
data.resize(dataSize);
if(params.remap == RemapTexture::RGBA32 && IsDepthAndStencilFormat(imInfo.format))
{
memcpy(data.data(), pData, dataSize);
Vec4f *output = (Vec4f *)data.data();
Vec4u *input = (Vec4u *)pData;
for(size_t i = 0; i < dataSize / sizeof(Vec4u); i++)
output[i].y = float(input[i].y) / 255.0f;
}
else if(isDepth && isStencil && copyToBuffer)
{
// We only need to manually interleave if we use CmdCopyImageToBuffer.
// CopyDepthTex2DMS2Buffer will produce interleaved results.
size_t pixelCount = std::max(1U, imCreateInfo.extent.width >> s.mip) *
std::max(1U, imCreateInfo.extent.height >> s.mip) *
std::max(1U, imCreateInfo.extent.depth >> s.mip);
// for some reason reading direct from mapped memory here is *super* slow on android (1.5s to
// iterate over the image), so we memcpy to a temporary buffer.
rdcarray<byte> tmp;
tmp.resize((size_t)stencilOffset + pixelCount * sizeof(uint8_t));
memcpy(tmp.data(), pData, tmp.size());
if(imCreateInfo.format == VK_FORMAT_D16_UNORM_S8_UINT)
{
uint16_t *dSrc = (uint16_t *)tmp.data();
uint8_t *sSrc = (uint8_t *)(tmp.data() + stencilOffset);
uint16_t *dDst = (uint16_t *)data.data();
uint16_t *sDst = dDst + 1; // interleaved, next pixel
for(size_t i = 0; i < pixelCount; i++)
{
*dDst = *dSrc;
*sDst = *sSrc;
// increment source pointers by 1 since they're separate, and dest pointers by 2 since
// they're interleaved
dDst += 2;
sDst += 2;
sSrc++;
dSrc++;
}
}
else if(imCreateInfo.format == VK_FORMAT_D24_UNORM_S8_UINT)
{
// we can copy the depth from D24 as a 32-bit integer, since the remaining bits are garbage
// and we overwrite them with stencil
uint32_t *dSrc = (uint32_t *)tmp.data();
uint8_t *sSrc = (uint8_t *)(tmp.data() + stencilOffset);
uint32_t *dst = (uint32_t *)data.data();
for(size_t i = 0; i < pixelCount; i++)
{
// pack the data together again, stencil in top bits
*dst = (*dSrc & 0x00ffffff) | (uint32_t(*sSrc) << 24);
dst++;
sSrc++;
dSrc++;
}
}
else
{
uint32_t *dSrc = (uint32_t *)tmp.data();
uint8_t *sSrc = (uint8_t *)(tmp.data() + stencilOffset);
uint32_t *dDst = (uint32_t *)data.data();
uint32_t *sDst = dDst + 1; // interleaved, next pixel
for(size_t i = 0; i < pixelCount; i++)
{
*dDst = *dSrc;
*sDst = *sSrc;
// increment source pointers by 1 since they're separate, and dest pointers by 2 since
// they're interleaved
dDst += 2;
sDst += 2;
sSrc++;
dSrc++;
}
}
// need to manually copy to interleave pixels
}
else
{
memcpy(data.data(), pData, dataSize);
// vulkan's bitpacking of some layouts puts alpha in the low bits, which is not our 'standard'
// layout and is not representable in our resource formats
if(params.standardLayout)
{
if(imCreateInfo.format == VK_FORMAT_R4G4B4A4_UNORM_PACK16 ||
imCreateInfo.format == VK_FORMAT_B4G4R4A4_UNORM_PACK16)
{
uint16_t *ptr = (uint16_t *)data.data();
for(uint32_t i = 0; i < dataSize; i += sizeof(uint16_t))
{
const uint16_t val = *ptr;
*ptr = (val >> 4) | ((val & 0xf) << 12);
ptr++;
}
}
else if(imCreateInfo.format == VK_FORMAT_R5G5B5A1_UNORM_PACK16 ||
imCreateInfo.format == VK_FORMAT_B5G5R5A1_UNORM_PACK16)
{
uint16_t *ptr = (uint16_t *)data.data();
for(uint32_t i = 0; i < dataSize; i += sizeof(uint16_t))
{
const uint16_t val = *ptr;
*ptr = (val >> 1) | ((val & 0x1) << 15);
ptr++;
}
}
}
}
vt->UnmapMemory(Unwrap(dev), readbackMem);
// clean up temporary objects
vt->DestroyBuffer(Unwrap(dev), readbackBuf, NULL);
vt->FreeMemory(Unwrap(dev), readbackMem, NULL);
if(tmpImage != VK_NULL_HANDLE)
{
GetResourceManager()->ReleaseWrappedResource(wrappedTmpImage, true);
vt->DestroyImage(Unwrap(dev), tmpImage, NULL);
vt->FreeMemory(Unwrap(dev), tmpMemory, NULL);
}
if(tmpFB != NULL)
{
if(IsStencilFormat(imInfo.format))
numFBs *= 2;
for(uint32_t i = 0; i < numFBs; i++)
{
vt->DestroyFramebuffer(Unwrap(dev), tmpFB[i], NULL);
vt->DestroyImageView(Unwrap(dev), tmpView[i], NULL);
}
delete[] tmpFB;
delete[] tmpView;
vt->DestroyRenderPass(Unwrap(dev), tmpRP, NULL);
vt->DestroyRenderPass(Unwrap(dev), tmpRPStencil, NULL);
}
}