ResultDetails ReplayController::SaveTexture()

in renderdoc/replay/replay_controller.cpp [600:1460]


ResultDetails ReplayController::SaveTexture(const TextureSave &saveData, const rdcstr &path)
{
  CHECK_REPLAY_THREAD();
  RENDERDOC_PROFILEFUNCTION();

  TextureSave sd = saveData;    // mutable copy
  ResourceId liveid = m_pDevice->GetLiveID(sd.resourceId);

  if(liveid == ResourceId())
  {
    RETURN_ERROR_RESULT(ResultCode::InvalidParameter,
                        "Couldn't get Live ID for %s getting texture data",
                        ToStr(sd.resourceId).c_str());
  }

  TextureDescription td = m_pDevice->GetTexture(liveid);

  // clamp sample/mip/slice indices
  if(td.msSamp == 1)
  {
    sd.sample.sampleIndex = 0;
    sd.sample.mapToArray = false;
  }
  else
  {
    if(sd.sample.sampleIndex != ~0U)
      sd.sample.sampleIndex = RDCCLAMP((uint32_t)sd.sample.sampleIndex, 0U, td.msSamp);
  }

  // don't support cube cruciform for non cubemaps, or
  // cubemap arrays
  if(!td.cubemap || td.arraysize != 6 || td.msSamp != 1)
    sd.slice.cubeCruciform = false;

  if(sd.mip != -1)
    sd.mip = RDCCLAMP(sd.mip, 0, (int32_t)td.mips);
  if(sd.slice.sliceIndex != -1)
    sd.slice.sliceIndex = RDCCLAMP(sd.slice.sliceIndex, 0, int32_t(td.arraysize * td.depth));

  if(td.arraysize * td.depth * td.msSamp == 1)
  {
    sd.slice.sliceIndex = 0;
    sd.slice.slicesAsGrid = false;
  }

  // can't extract a channel that's not in the source texture
  if(sd.channelExtract >= 0 && (uint32_t)sd.channelExtract >= td.format.compCount)
    sd.channelExtract = -1;

  sd.slice.sliceGridWidth = RDCMAX(sd.slice.sliceGridWidth, 1);

  // store sample count so we know how many 'slices' is one real slice
  // multisampled textures cannot have mips, subresource layout is same as would be for mips:
  // [slice0 sample0], [slice0 sample1], [slice1 sample0], [slice1 sample1]
  uint32_t sampleCount = RDCMAX(td.msSamp, 1U);
  bool multisampled = td.msSamp > 1;

  if(sd.sample.mapToArray)
    sd.sample.sampleIndex = 0;

  bool resolveSamples = (sd.sample.sampleIndex == ~0U);

  if(resolveSamples)
  {
    td.msSamp = 1;
    sd.sample.mapToArray = false;
    sd.sample.sampleIndex = 0;
  }

  // treat any multisampled texture as if it were an array
  // of <sample count> dimension (on top of potential existing array
  // dimension).
  if(td.msSamp > 1)
  {
    td.arraysize *= td.msSamp;
    td.msSamp = 1;
  }

  if(sd.destType != FileType::DDS && sd.sample.mapToArray && !sd.slice.slicesAsGrid &&
     sd.slice.sliceIndex == -1)
  {
    sd.sample.mapToArray = false;
    sd.sample.sampleIndex = 0;
  }

  // only DDS supports writing multiple mips, fall back to mip 0 if 'all mips' was specified
  if(sd.destType != FileType::DDS && sd.mip == -1)
    sd.mip = 0;

  // only DDS supports writing multiple slices, fall back to slice 0 if 'all slices' was specified
  if(sd.destType != FileType::DDS && sd.slice.sliceIndex == -1 && !sd.slice.slicesAsGrid &&
     !sd.slice.cubeCruciform)
    sd.slice.sliceIndex = 0;

  // fetch source data subresources (typically only one, possibly more
  // if we're writing to DDS (so writing multiple mips/slices) or resolving
  // down a multisampled texture for writing as a single 'image' elsewhere)
  uint32_t sliceOffset = 0;
  uint32_t sliceStride = 1;
  uint32_t numSlices = td.arraysize * td.depth;

  uint32_t mipOffset = 0;
  uint32_t numMips = td.mips;

  bool singleSlice = (sd.slice.sliceIndex != -1);

  // set which slices/mips we need
  if(multisampled)
  {
    bool singleSample = !sd.sample.mapToArray;

    // multisampled images have no mips
    mipOffset = 0;
    numMips = 1;

    if(singleSlice)
    {
      if(singleSample)
      {
        // we want a specific sample in a specific real slice
        sliceOffset = sd.slice.sliceIndex * sampleCount + sd.sample.sampleIndex;
        numSlices = 1;
      }
      else
      {
        // we want all the samples (now mapped to slices) in a specific real slice
        sliceOffset = sd.slice.sliceIndex;
        numSlices = sampleCount;
      }
    }
    else
    {
      if(singleSample)
      {
        // we want one sample in every slice, so we have to set the stride to sampleCount
        // to skip every other sample (mapped to slices), starting from the sample we want
        // in the first real slice
        sliceOffset = sd.sample.sampleIndex;
        sliceStride = sampleCount;
        numSlices = RDCMAX(1U, td.arraysize / sampleCount);
      }
      else
      {
        // we want all slices, all samples
        sliceOffset = 0;
        numSlices = td.arraysize;
      }
    }
  }
  else
  {
    if(singleSlice)
    {
      numSlices = 1;
      sliceOffset = sd.slice.sliceIndex;
    }
    // otherwise take all slices, as by default

    if(sd.mip != -1)
    {
      mipOffset = sd.mip;
      numMips = 1;
    }
    // otherwise take all mips, as by default
  }

  rdcarray<byte *> subdata;

  bool downcast = false;

  // don't support slice mappings for DDS - it supports slices natively
  if(sd.destType == FileType::DDS)
  {
    sd.slice.cubeCruciform = false;
    sd.slice.slicesAsGrid = false;
  }

  // force downcast to be able to do grid mappings
  if(sd.slice.cubeCruciform || sd.slice.slicesAsGrid)
    downcast = true;

  // we don't support any file formats that handle these block compression formats
  if(td.format.type == ResourceFormatType::ETC2 || td.format.type == ResourceFormatType::EAC ||
     td.format.type == ResourceFormatType::ASTC)
    downcast = true;

  // for non-HDR always downcast if we're not already RGBA8 unorm
  if(sd.destType != FileType::DDS && sd.destType != FileType::HDR && sd.destType != FileType::EXR &&
     (td.format.compByteWidth != 1 || td.format.compCount != 4 ||
      td.format.compType != CompType::UNorm || td.format.BGRAOrder() || td.format.Special()))
    downcast = true;

  // for HDR & EXR we can convert from most regular types as well as 10.10.10.2 and 11.11.10
  if(sd.destType != FileType::DDS && td.format.Special() &&
     td.format.type != ResourceFormatType::R10G10B10A2 &&
     td.format.type != ResourceFormatType::R11G11B10)
    downcast = true;

  // if we're downcasting, pick either RGBA8 or RGBA32 to downcast to
  RemapTexture remap = RemapTexture::NoRemap;

  if(downcast)
  {
    const bool destHDR = (sd.destType == FileType::DDS || sd.destType == FileType::HDR ||
                          sd.destType == FileType::EXR);

    const bool sourceHDR =
        td.format.compByteWidth > 1 || td.format.type == ResourceFormatType::D16S8 ||
        td.format.type == ResourceFormatType::D24S8 || td.format.type == ResourceFormatType::D32S8 ||
        td.format.type == ResourceFormatType::R11G11B10 ||
        td.format.type == ResourceFormatType::R10G10B10A2 ||
        td.format.type == ResourceFormatType::R9G9B9E5 || td.format.type == ResourceFormatType::BC6 ||
        td.format.type == ResourceFormatType::BC7 || td.format.type == ResourceFormatType::YUV10 ||
        td.format.type == ResourceFormatType::YUV12 || td.format.type == ResourceFormatType::YUV16;

    // if the source and destination have more than 1 byte per component, remap to RGBA32 to avoid
    // precision loss
    if(sourceHDR && destHDR)
    {
      remap = RemapTexture::RGBA32;
      td.format.compByteWidth = 4;
      td.format.compCount = 4;
      td.format.compType = CompType::Float;
      td.format.type = ResourceFormatType::Regular;
    }
    else
    {
      remap = RemapTexture::RGBA8;
      td.format.compByteWidth = 1;
      td.format.compCount = 4;
      td.format.compType = CompType::UNorm;
      td.format.type = ResourceFormatType::Regular;
    }
  }

  uint32_t rowPitch = 0;
  uint32_t slicePitch = 0;

  bool blockformat = false;
  int blockSize = 0;
  uint32_t bytesPerPixel = 1;

  td.width = RDCMAX(1U, td.width >> mipOffset);
  td.height = RDCMAX(1U, td.height >> mipOffset);
  td.depth = RDCMAX(1U, td.depth >> mipOffset);

  if(td.format.type == ResourceFormatType::BC1 || td.format.type == ResourceFormatType::BC2 ||
     td.format.type == ResourceFormatType::BC3 || td.format.type == ResourceFormatType::BC4 ||
     td.format.type == ResourceFormatType::BC5 || td.format.type == ResourceFormatType::BC6 ||
     td.format.type == ResourceFormatType::BC7)
  {
    blockSize =
        (td.format.type == ResourceFormatType::BC1 || td.format.type == ResourceFormatType::BC4)
            ? 8
            : 16;
    rowPitch = RDCMAX(1U, ((td.width + 3) / 4)) * blockSize;
    slicePitch = rowPitch * RDCMAX(1U, td.height / 4);
    blockformat = true;
  }
  else
  {
    switch(td.format.type)
    {
      case ResourceFormatType::S8:
      case ResourceFormatType::A8: bytesPerPixel = 1; break;
      case ResourceFormatType::R10G10B10A2:
      case ResourceFormatType::R9G9B9E5:
      case ResourceFormatType::R11G11B10:
      case ResourceFormatType::D24S8: bytesPerPixel = 4; break;
      case ResourceFormatType::R5G6B5:
      case ResourceFormatType::R5G5B5A1:
      case ResourceFormatType::R4G4B4A4: bytesPerPixel = 2; break;
      case ResourceFormatType::D32S8: bytesPerPixel = 8; break;
      case ResourceFormatType::D16S8:
      case ResourceFormatType::YUV8:
      case ResourceFormatType::YUV10:
      case ResourceFormatType::YUV12:
      case ResourceFormatType::YUV16:
      case ResourceFormatType::R4G4:
      {
        RETURN_ERROR_RESULT(ResultCode::ImageUnsupported, "Unsupported file format %s",
                            ToStr(td.format.type).c_str());
      }
      default: bytesPerPixel = td.format.compCount * td.format.compByteWidth;
    }

    rowPitch = td.width * bytesPerPixel;
    slicePitch = rowPitch * td.height;
  }

  // loop over fetching subresources
  for(uint32_t s = 0; s < numSlices; s++)
  {
    uint32_t slice = s * sliceStride + sliceOffset;

    for(uint32_t m = 0; m < numMips; m++)
    {
      uint32_t mip = m + mipOffset;

      GetTextureDataParams params;
      params.forDiskSave = true;
      params.standardLayout = true;
      params.typeCast = sd.typeCast;
      params.resolve = resolveSamples;
      params.remap = remap;
      params.blackPoint = sd.comp.blackPoint;
      params.whitePoint = sd.comp.whitePoint;

      Subresource sub = {mip, slice / sampleCount, slice % sampleCount};

      bytebuf data;
      m_pDevice->GetTextureData(liveid, sub, params, data);
      FatalErrorCheck();

      if(data.empty())
      {
        for(size_t i = 0; i < subdata.size(); i++)
          delete[] subdata[i];

        RETURN_ERROR_RESULT(ResultCode::DataNotAvailable,
                            "Couldn't readback bytes for mip %u, slice %u, sample %u", sub.mip,
                            sub.slice, sub.sample);
      }

      if(td.depth == 1)
      {
        byte *bytes = new byte[data.size()];
        memcpy(bytes, data.data(), data.size());
        subdata.push_back(bytes);
        continue;
      }

      uint32_t mipSlicePitch = slicePitch;

      uint32_t w = RDCMAX(1U, td.width >> m);
      uint32_t h = RDCMAX(1U, td.height >> m);
      uint32_t d = RDCMAX(1U, td.depth >> m);

      if(blockformat)
      {
        mipSlicePitch = RDCMAX(1U, ((w + 3) / 4)) * blockSize * RDCMAX(1U, h / 4);
      }
      else
      {
        mipSlicePitch = w * bytesPerPixel * h;
      }

      // we don't support slice ranges, only all-or-nothing
      // we're also not dealing with multisampled slices if
      // depth > 1. So if we only want one slice out of a 3D texture
      // then make sure we get it
      if(numSlices == 1)
      {
        byte *depthslice = new byte[mipSlicePitch];
        byte *b = data.data() + mipSlicePitch * sliceOffset;
        memcpy(depthslice, b, mipSlicePitch);
        subdata.push_back(depthslice);

        continue;
      }

      s += (d - 1);

      byte *b = data.data();

      // add each depth slice as a separate subdata
      for(uint32_t di = 0; di < d; di++)
      {
        byte *depthslice = new byte[mipSlicePitch];

        memcpy(depthslice, b, mipSlicePitch);

        subdata.push_back(depthslice);

        b += mipSlicePitch;
      }
    }
  }

  // should have been handled above, but verify incoming data is RGBA8 or RGBA32
  if(sd.slice.slicesAsGrid && (td.format.compByteWidth == 1 || td.format.compByteWidth == 4) &&
     td.format.compCount == 4 && !td.format.Special())
  {
    uint32_t sliceWidth = td.width;
    uint32_t sliceHeight = td.height;

    uint32_t sliceGridHeight = (td.arraysize * td.depth) / sd.slice.sliceGridWidth;
    if((td.arraysize * td.depth) % sd.slice.sliceGridWidth != 0)
      sliceGridHeight++;

    td.width *= sd.slice.sliceGridWidth;
    td.height *= sliceGridHeight;

    uint32_t pixelStride = td.format.compCount * td.format.compByteWidth;

    byte *combinedData = new byte[td.width * td.height * pixelStride];

    memset(combinedData, 0, td.width * td.height * pixelStride);

    for(size_t i = 0; i < subdata.size(); i++)
    {
      uint32_t gridx = (uint32_t)i % sd.slice.sliceGridWidth;
      uint32_t gridy = (uint32_t)i / sd.slice.sliceGridWidth;

      uint32_t yoffs = gridy * sliceHeight;
      uint32_t xoffs = gridx * sliceWidth;

      for(uint32_t y = 0; y < sliceHeight; y++)
      {
        for(uint32_t x = 0; x < sliceWidth; x++)
        {
          uint32_t *srcpix = (uint32_t *)&subdata[i][(y * sliceWidth + x) * pixelStride + 0];
          uint32_t *dstpix =
              (uint32_t *)&combinedData[((y + yoffs) * td.width + x + xoffs) * pixelStride + 0];

          memcpy(dstpix, srcpix, pixelStride);
        }
      }

      delete[] subdata[i];
    }

    subdata.resize(1);
    subdata[0] = combinedData;
    rowPitch = td.width * 4;
  }

  // should have been handled above, but verify incoming data is RGBA8 or RGBA32 and 6 slices
  if(sd.slice.cubeCruciform && (td.format.compByteWidth == 1 || td.format.compByteWidth == 4) &&
     td.format.compCount == 4 && !td.format.Special() && subdata.size() == 6)
  {
    uint32_t sliceWidth = td.width;
    uint32_t sliceHeight = td.height;

    td.width *= 4;
    td.height *= 3;

    uint32_t pixelStride = td.format.compCount * td.format.compByteWidth;

    byte *combinedData = new byte[td.width * td.height * pixelStride];

    memset(combinedData, 0, td.width * td.height * pixelStride);

    /*
     Y X=0   1   2   3
     =     +---+
     0     |+y |
           |[2]|
       +---+---+---+---+
     1 |-x |+z |+x |-z |
       |[1]|[4]|[0]|[5]|
       +---+---+---+---+
     2     |-y |
           |[3]|
           +---+

    */

    uint32_t gridx[6] = {2, 0, 1, 1, 1, 3};
    uint32_t gridy[6] = {1, 1, 0, 2, 1, 1};

    for(size_t i = 0; i < subdata.size(); i++)
    {
      uint32_t yoffs = gridy[i] * sliceHeight;
      uint32_t xoffs = gridx[i] * sliceWidth;

      for(uint32_t y = 0; y < sliceHeight; y++)
      {
        for(uint32_t x = 0; x < sliceWidth; x++)
        {
          uint32_t *srcpix = (uint32_t *)&subdata[i][(y * sliceWidth + x) * pixelStride + 0];
          uint32_t *dstpix =
              (uint32_t *)&combinedData[((y + yoffs) * td.width + x + xoffs) * pixelStride + 0];

          memcpy(dstpix, srcpix, pixelStride);
        }
      }

      delete[] subdata[i];
    }

    subdata.resize(1);
    subdata[0] = combinedData;
    rowPitch = td.width * 4;
  }

  int numComps = td.format.compCount;

  // if we want a grayscale image of one channel, splat it across all channels
  // and set alpha to full
  if(sd.channelExtract >= 0 && td.format.type == ResourceFormatType::Regular &&
     (td.format.compByteWidth == 1 || td.format.compByteWidth == 4) &&
     (uint32_t)sd.channelExtract < td.format.compCount)
  {
    uint32_t pixelStride = td.format.compCount * td.format.compByteWidth;
    uint32_t compWidth = td.format.compByteWidth;
    uint32_t compCount = td.format.compCount;

    uint32_t val = 0;
    uint32_t max = ~0U;

    for(uint32_t y = 0; y < td.height; y++)
    {
      for(uint32_t x = 0; x < td.width; x++)
      {
        memcpy(&val, &subdata[0][(y * td.width + x) * pixelStride + sd.channelExtract * compWidth],
               td.format.compByteWidth);

        switch(compCount)
        {
          case 4:
            memcpy(&subdata[0][(y * td.width + x) * pixelStride + 3 * compWidth], &max,
                   td.format.compByteWidth);
            DELIBERATE_FALLTHROUGH();
          case 3:
            memcpy(&subdata[0][(y * td.width + x) * pixelStride + 2 * compWidth], &val,
                   td.format.compByteWidth);
            DELIBERATE_FALLTHROUGH();
          case 2:
            memcpy(&subdata[0][(y * td.width + x) * pixelStride + 1 * compWidth], &val,
                   td.format.compByteWidth);
            DELIBERATE_FALLTHROUGH();
          case 1:
            memcpy(&subdata[0][(y * td.width + x) * pixelStride + 0 * compWidth], &val,
                   td.format.compByteWidth);
            break;
        }
      }
    }
  }

  // handle formats that don't support alpha
  if(numComps == 4 && (sd.destType == FileType::BMP || sd.destType == FileType::JPG))
  {
    byte *nonalpha = new byte[td.width * td.height * 3];

    for(uint32_t y = 0; y < td.height; y++)
    {
      for(uint32_t x = 0; x < td.width; x++)
      {
        byte r = subdata[0][(y * td.width + x) * 4 + 0];
        byte g = subdata[0][(y * td.width + x) * 4 + 1];
        byte b = subdata[0][(y * td.width + x) * 4 + 2];
        byte a = subdata[0][(y * td.width + x) * 4 + 3];

        if(sd.alpha != AlphaMapping::Discard)
        {
          Vec4f col = Vec4f(sd.alphaCol.x, sd.alphaCol.y, sd.alphaCol.z);
          if(sd.alpha == AlphaMapping::BlendToCheckerboard)
          {
            bool lightSquare = ((x / 64) % 2) == ((y / 64) % 2);
            col = lightSquare ? RenderDoc::Inst().LightCheckerboardColor()
                              : RenderDoc::Inst().DarkCheckerboardColor();
          }

          col.x = ConvertLinearToSRGB(col.x);
          col.y = ConvertLinearToSRGB(col.y);
          col.z = ConvertLinearToSRGB(col.z);

          FloatVector pixel = FloatVector(float(r) / 255.0f, float(g) / 255.0f, float(b) / 255.0f,
                                          float(a) / 255.0f);

          pixel.x = pixel.x * pixel.w + col.x * (1.0f - pixel.w);
          pixel.y = pixel.y * pixel.w + col.y * (1.0f - pixel.w);
          pixel.z = pixel.z * pixel.w + col.z * (1.0f - pixel.w);

          r = byte(pixel.x * 255.0f);
          g = byte(pixel.y * 255.0f);
          b = byte(pixel.z * 255.0f);
        }

        nonalpha[(y * td.width + x) * 3 + 0] = r;
        nonalpha[(y * td.width + x) * 3 + 1] = g;
        nonalpha[(y * td.width + x) * 3 + 2] = b;
      }
    }

    delete[] subdata[0];

    subdata[0] = nonalpha;

    numComps = 3;
    rowPitch = td.width * 3;
  }

  // assume that (R,G,0) is better mapping than (Y,A) for 2 component data
  if(numComps == 2 && (sd.destType == FileType::BMP || sd.destType == FileType::JPG ||
                       sd.destType == FileType::PNG || sd.destType == FileType::TGA))
  {
    byte *rg0 = new byte[td.width * td.height * 3];

    for(uint32_t y = 0; y < td.height; y++)
    {
      for(uint32_t x = 0; x < td.width; x++)
      {
        byte r = subdata[0][(y * td.width + x) * 2 + 0];
        byte g = subdata[0][(y * td.width + x) * 2 + 1];

        rg0[(y * td.width + x) * 3 + 0] = r;
        rg0[(y * td.width + x) * 3 + 1] = g;
        rg0[(y * td.width + x) * 3 + 2] = 0;

        // if we're greyscaling the image, then keep the greyscale here.
        if(sd.channelExtract >= 0)
          rg0[(y * td.width + x) * 3 + 2] = r;
      }
    }

    delete[] subdata[0];

    subdata[0] = rg0;

    numComps = 3;
    rowPitch = td.width * 3;
  }

  FILE *f = FileIO::fopen(path, FileIO::WriteBinary);

  RDResult res;

  if(!f)
  {
    RETURN_ERROR_RESULT(ResultCode::FileIOFailed, "Couldn't write to path %s, error: %s",
                        path.c_str(), FileIO::ErrorString().c_str());
  }
  else
  {
    if(sd.destType == FileType::DDS)
    {
      write_dds_data ddsData;

      ResourceFormat saveFmt = td.format;
      // use typeCast to inform typeless saving, otherwise it will get lost
      if(saveFmt.compType == CompType::Typeless)
        saveFmt.compType = sd.typeCast;

      ddsData.width = td.width;
      ddsData.height = td.height;
      ddsData.depth = td.depth;
      ddsData.format = saveFmt;
      ddsData.mips = numMips;
      ddsData.slices = numSlices / td.depth;
      ddsData.subresources = subdata;
      ddsData.cubemap = td.cubemap && numSlices == 6;

      if(singleSlice)
        ddsData.depth = ddsData.slices = 1;

      res = write_dds_to_file(f, ddsData);
    }
    else if(sd.destType == FileType::BMP)
    {
      int ret = stbi_write_bmp_to_func(fileWriteFunc, (void *)f, td.width, td.height, numComps,
                                       subdata[0]);

      if(ret == 0)
        SET_ERROR_RESULT(res, ResultCode::InternalError, "Failed to write BMP image");
    }
    else if(sd.destType == FileType::PNG)
    {
      int ret = stbi_write_png_to_func(fileWriteFunc, (void *)f, td.width, td.height, numComps,
                                       subdata[0], rowPitch);

      if(ret == 0)
        SET_ERROR_RESULT(res, ResultCode::InternalError, "Failed to write PNG image");
    }
    else if(sd.destType == FileType::TGA)
    {
      int ret = stbi_write_tga_to_func(fileWriteFunc, (void *)f, td.width, td.height, numComps,
                                       subdata[0]);

      if(ret == 0)
        SET_ERROR_RESULT(res, ResultCode::InternalError, "Failed to write TGA image");
    }
    else if(sd.destType == FileType::JPG)
    {
      jpge::params p;
      p.m_quality = sd.jpegQuality;

      int len = td.width * td.height * td.format.compCount;
      // ensure buffer is at least 1024
      if(len < 1024)
        len = 1024;

      char *jpgdst = new char[len];

      bool success = jpge::compress_image_to_jpeg_file_in_memory(jpgdst, len, td.width, td.height,
                                                                 numComps, subdata[0], p);

      if(success)
        fwrite(jpgdst, 1, len, f);
      else
        SET_ERROR_RESULT(res, ResultCode::InternalError, "Failed to write JPG image");

      delete[] jpgdst;
    }
    else if(sd.destType == FileType::HDR || sd.destType == FileType::EXR)
    {
      float *fldata = NULL;
      float *abgr[4] = {NULL, NULL, NULL, NULL};

      if(sd.destType == FileType::HDR)
      {
        fldata = new float[td.width * td.height * 4];
      }
      else
      {
        abgr[0] = new float[td.width * td.height];
        abgr[1] = new float[td.width * td.height];
        abgr[2] = new float[td.width * td.height];
        abgr[3] = new float[td.width * td.height];
      }

      byte *srcData = subdata[0];

      ResourceFormat saveFmt = td.format;
      if(saveFmt.compType == CompType::Typeless)
        saveFmt.compType = sd.typeCast;
      if(saveFmt.compType == CompType::Typeless)
        saveFmt.compType = saveFmt.compByteWidth == 4 ? CompType::Float : CompType::UNorm;

      uint32_t pixStride = saveFmt.ElementSize();

      // 24-bit depth still has a stride of 4 bytes.
      if(saveFmt.compType == CompType::Depth && pixStride == 3)
        pixStride = 4;

      for(uint32_t y = 0; y < td.height; y++)
      {
        for(uint32_t x = 0; x < td.width; x++)
        {
          FloatVector pixel = DecodeFormattedComponents(saveFmt, srcData);
          srcData += pixStride;

          // HDR can't represent negative values
          if(sd.destType == FileType::HDR)
          {
            pixel.x = RDCMAX(pixel.x, 0.0f);
            pixel.y = RDCMAX(pixel.y, 0.0f);
            pixel.z = RDCMAX(pixel.z, 0.0f);
            pixel.w = RDCMAX(pixel.w, 0.0f);
          }

          if(sd.channelExtract == 0)
          {
            pixel.y = pixel.z = pixel.x;
            pixel.w = 1.0f;
          }
          else if(sd.channelExtract == 1)
          {
            pixel.x = pixel.z = pixel.y;
            pixel.w = 1.0f;
          }
          else if(sd.channelExtract == 2)
          {
            pixel.x = pixel.y = pixel.z;
            pixel.w = 1.0f;
          }
          else if(sd.channelExtract == 3)
          {
            pixel.x = pixel.y = pixel.z = pixel.w;
            pixel.w = 1.0f;
          }

          if(fldata)
          {
            fldata[(y * td.width + x) * 4 + 0] = pixel.x;
            fldata[(y * td.width + x) * 4 + 1] = pixel.y;
            fldata[(y * td.width + x) * 4 + 2] = pixel.z;
            fldata[(y * td.width + x) * 4 + 3] = pixel.w;
          }
          else
          {
            abgr[0][(y * td.width + x)] = pixel.w;
            abgr[1][(y * td.width + x)] = pixel.z;
            abgr[2][(y * td.width + x)] = pixel.y;
            abgr[3][(y * td.width + x)] = pixel.x;
          }
        }
      }

      if(sd.destType == FileType::HDR)
      {
        int ret = stbi_write_hdr_to_func(fileWriteFunc, (void *)f, td.width, td.height, 4, fldata);

        if(ret == 0)
          SET_ERROR_RESULT(res, ResultCode::InternalError, "Failed to write HDR image");
      }
      else if(sd.destType == FileType::EXR)
      {
        const char *err = NULL;

        EXRHeader exrHeader;
        InitEXRHeader(&exrHeader);

        EXRImage exrImage;
        InitEXRImage(&exrImage);

        int pixTypes[4] = {TINYEXR_PIXELTYPE_FLOAT, TINYEXR_PIXELTYPE_FLOAT,
                           TINYEXR_PIXELTYPE_FLOAT, TINYEXR_PIXELTYPE_FLOAT};
        int reqTypes[4] = {TINYEXR_PIXELTYPE_HALF, TINYEXR_PIXELTYPE_HALF, TINYEXR_PIXELTYPE_HALF,
                           TINYEXR_PIXELTYPE_HALF};

        if(saveFmt.compByteWidth == 4)
        {
          for(size_t channel = 0; channel < 4; channel++)
          {
            reqTypes[channel] = TINYEXR_PIXELTYPE_FLOAT;
          }
        }

        // must be in this order as many viewers don't pay attention to channels and just assume
        // they are in this order
        EXRChannelInfo bgraChannels[4] = {
            {"A"},
            {"B"},
            {"G"},
            {"R"},
        };

        exrHeader.num_channels = 4;
        exrHeader.channels = bgraChannels;
        exrImage.images = (unsigned char **)abgr;
        exrImage.width = td.width;
        exrImage.height = td.height;
        exrHeader.pixel_types = pixTypes;
        exrHeader.requested_pixel_types = reqTypes;

        unsigned char *mem = NULL;

        size_t ret = SaveEXRImageToMemory(&exrImage, &exrHeader, &mem, &err);

        if(ret > 0)
          FileIO::fwrite(mem, 1, ret, f);
        else
          SET_ERROR_RESULT(res, ResultCode::InternalError, "Failed to write EXR image: %s", err);

        free(mem);
      }

      if(fldata)
      {
        delete[] fldata;
      }
      else
      {
        delete[] abgr[0];
        delete[] abgr[1];
        delete[] abgr[2];
        delete[] abgr[3];
      }
    }

    FileIO::fclose(f);
  }

  for(size_t i = 0; i < subdata.size(); i++)
    delete[] subdata[i];

  return res;
}