in DirectXTex/DirectXTexMipmaps.cpp [2150:2525]
HRESULT Generate3DMipsCubicFilter(size_t depth, size_t levels, TEX_FILTER_FLAGS filter, const ScratchImage& mipChain) noexcept
{
if (!depth || !mipChain.GetImages())
return E_INVALIDARG;
// This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)
assert(levels > 1);
size_t width = mipChain.GetMetadata().width;
size_t height = mipChain.GetMetadata().height;
// Allocate temporary space (17 scanlines, plus X/Y/Z filters)
auto scanline = make_AlignedArrayXMVECTOR(uint64_t(width) * 17);
if (!scanline)
return E_OUTOFMEMORY;
std::unique_ptr<CubicFilter[]> cf(new (std::nothrow) CubicFilter[width + height + depth]);
if (!cf)
return E_OUTOFMEMORY;
CubicFilter* cfX = cf.get();
CubicFilter* cfY = cf.get() + width;
CubicFilter* cfZ = cf.get() + width + height;
XMVECTOR* target = scanline.get();
XMVECTOR* urow[4];
XMVECTOR* vrow[4];
XMVECTOR* srow[4];
XMVECTOR* trow[4];
XMVECTOR *ptr = scanline.get() + width;
for (size_t j = 0; j < 4; ++j)
{
urow[j] = ptr; ptr += width;
vrow[j] = ptr; ptr += width;
srow[j] = ptr; ptr += width;
trow[j] = ptr; ptr += width;
}
// Resize base image to each target mip level
for (size_t level = 1; level < levels; ++level)
{
size_t nwidth = (width > 1) ? (width >> 1) : 1;
_CreateCubicFilter(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX);
size_t nheight = (height > 1) ? (height >> 1) : 1;
_CreateCubicFilter(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY);
#ifdef _DEBUG
for (size_t j = 0; j < 4; ++j)
{
memset(urow[j], 0xCD, sizeof(XMVECTOR)*width);
memset(vrow[j], 0xDD, sizeof(XMVECTOR)*width);
memset(srow[j], 0xED, sizeof(XMVECTOR)*width);
memset(trow[j], 0xFD, sizeof(XMVECTOR)*width);
}
#endif
if (depth > 1)
{
// 3D cubic filter
size_t ndepth = depth >> 1;
_CreateCubicFilter(depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, (filter & TEX_FILTER_MIRROR_W) != 0, cfZ);
for (size_t slice = 0; slice < ndepth; ++slice)
{
auto& toZ = cfZ[slice];
const Image* srca = mipChain.GetImage(level - 1, 0, toZ.u0);
const Image* srcb = mipChain.GetImage(level - 1, 0, toZ.u1);
const Image* srcc = mipChain.GetImage(level - 1, 0, toZ.u2);
const Image* srcd = mipChain.GetImage(level - 1, 0, toZ.u3);
if (!srca || !srcb || !srcc || !srcd)
return E_POINTER;
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
size_t u2 = size_t(-1);
size_t u3 = size_t(-1);
const Image* dest = mipChain.GetImage(level, 0, slice);
if (!dest)
return E_POINTER;
uint8_t* pDest = dest->pixels;
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = cfY[y];
// Scanline 1
if (toY.u0 != u0)
{
if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(urow[0], width, srca->pixels + (srca->rowPitch * u0), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(urow[1], width, srcb->pixels + (srcb->rowPitch * u0), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(urow[2], width, srcc->pixels + (srcc->rowPitch * u0), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(urow[3], width, srcd->pixels + (srcd->rowPitch * u0), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
else if (toY.u0 == u1)
{
u0 = u1;
u1 = size_t(-1);
std::swap(urow[0], vrow[0]);
std::swap(urow[1], vrow[1]);
std::swap(urow[2], vrow[2]);
std::swap(urow[3], vrow[3]);
}
else if (toY.u0 == u2)
{
u0 = u2;
u2 = size_t(-1);
std::swap(urow[0], srow[0]);
std::swap(urow[1], srow[1]);
std::swap(urow[2], srow[2]);
std::swap(urow[3], srow[3]);
}
else if (toY.u0 == u3)
{
u0 = u3;
u3 = size_t(-1);
std::swap(urow[0], trow[0]);
std::swap(urow[1], trow[1]);
std::swap(urow[2], trow[2]);
std::swap(urow[3], trow[3]);
}
}
// Scanline 2
if (toY.u1 != u1)
{
if (toY.u1 != u2 && toY.u1 != u3)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(vrow[0], width, srca->pixels + (srca->rowPitch * u1), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(vrow[1], width, srcb->pixels + (srcb->rowPitch * u1), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(vrow[2], width, srcc->pixels + (srcc->rowPitch * u1), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(vrow[3], width, srcd->pixels + (srcd->rowPitch * u1), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
else if (toY.u1 == u2)
{
u1 = u2;
u2 = size_t(-1);
std::swap(vrow[0], srow[0]);
std::swap(vrow[1], srow[1]);
std::swap(vrow[2], srow[2]);
std::swap(vrow[3], srow[3]);
}
else if (toY.u1 == u3)
{
u1 = u3;
u3 = size_t(-1);
std::swap(vrow[0], trow[0]);
std::swap(vrow[1], trow[1]);
std::swap(vrow[2], trow[2]);
std::swap(vrow[3], trow[3]);
}
}
// Scanline 3
if (toY.u2 != u2)
{
if (toY.u2 != u3)
{
u2 = toY.u2;
if (!_LoadScanlineLinear(srow[0], width, srca->pixels + (srca->rowPitch * u2), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(srow[1], width, srcb->pixels + (srcb->rowPitch * u2), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(srow[2], width, srcc->pixels + (srcc->rowPitch * u2), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(srow[3], width, srcd->pixels + (srcd->rowPitch * u2), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
else
{
u2 = u3;
u3 = size_t(-1);
std::swap(srow[0], trow[0]);
std::swap(srow[1], trow[1]);
std::swap(srow[2], trow[2]);
std::swap(srow[3], trow[3]);
}
}
// Scanline 4
if (toY.u3 != u3)
{
u3 = toY.u3;
if (!_LoadScanlineLinear(trow[0], width, srca->pixels + (srca->rowPitch * u3), srca->rowPitch, srca->format, filter)
|| !_LoadScanlineLinear(trow[1], width, srcb->pixels + (srcb->rowPitch * u3), srcb->rowPitch, srcb->format, filter)
|| !_LoadScanlineLinear(trow[2], width, srcc->pixels + (srcc->rowPitch * u3), srcc->rowPitch, srcc->format, filter)
|| !_LoadScanlineLinear(trow[3], width, srcd->pixels + (srcd->rowPitch * u3), srcd->rowPitch, srcd->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = cfX[x];
XMVECTOR D[4];
for (size_t j = 0; j < 4; ++j)
{
XMVECTOR C0, C1, C2, C3;
CUBIC_INTERPOLATE(C0, toX.x, urow[j][toX.u0], urow[j][toX.u1], urow[j][toX.u2], urow[j][toX.u3])
CUBIC_INTERPOLATE(C1, toX.x, vrow[j][toX.u0], vrow[j][toX.u1], vrow[j][toX.u2], vrow[j][toX.u3])
CUBIC_INTERPOLATE(C2, toX.x, srow[j][toX.u0], srow[j][toX.u1], srow[j][toX.u2], srow[j][toX.u3])
CUBIC_INTERPOLATE(C3, toX.x, trow[j][toX.u0], trow[j][toX.u1], trow[j][toX.u2], trow[j][toX.u3])
CUBIC_INTERPOLATE(D[j], toY.x, C0, C1, C2, C3)
}
CUBIC_INTERPOLATE(target[x], toZ.x, D[0], D[1], D[2], D[3])
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
}
else
{
// 2D cubic filter
const Image* src = mipChain.GetImage(level - 1, 0, 0);
const Image* dest = mipChain.GetImage(level, 0, 0);
if (!src || !dest)
return E_POINTER;
const uint8_t* pSrc = src->pixels;
uint8_t* pDest = dest->pixels;
size_t rowPitch = src->rowPitch;
size_t u0 = size_t(-1);
size_t u1 = size_t(-1);
size_t u2 = size_t(-1);
size_t u3 = size_t(-1);
for (size_t y = 0; y < nheight; ++y)
{
auto& toY = cfY[y];
// Scanline 1
if (toY.u0 != u0)
{
if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
{
u0 = toY.u0;
if (!_LoadScanlineLinear(urow[0], width, pSrc + (rowPitch * u0), rowPitch, src->format, filter))
return E_FAIL;
}
else if (toY.u0 == u1)
{
u0 = u1;
u1 = size_t(-1);
std::swap(urow[0], vrow[0]);
}
else if (toY.u0 == u2)
{
u0 = u2;
u2 = size_t(-1);
std::swap(urow[0], srow[0]);
}
else if (toY.u0 == u3)
{
u0 = u3;
u3 = size_t(-1);
std::swap(urow[0], trow[0]);
}
}
// Scanline 2
if (toY.u1 != u1)
{
if (toY.u1 != u2 && toY.u1 != u3)
{
u1 = toY.u1;
if (!_LoadScanlineLinear(vrow[0], width, pSrc + (rowPitch * u1), rowPitch, src->format, filter))
return E_FAIL;
}
else if (toY.u1 == u2)
{
u1 = u2;
u2 = size_t(-1);
std::swap(vrow[0], srow[0]);
}
else if (toY.u1 == u3)
{
u1 = u3;
u3 = size_t(-1);
std::swap(vrow[0], trow[0]);
}
}
// Scanline 3
if (toY.u2 != u2)
{
if (toY.u2 != u3)
{
u2 = toY.u2;
if (!_LoadScanlineLinear(srow[0], width, pSrc + (rowPitch * u2), rowPitch, src->format, filter))
return E_FAIL;
}
else
{
u2 = u3;
u3 = size_t(-1);
std::swap(srow[0], trow[0]);
}
}
// Scanline 4
if (toY.u3 != u3)
{
u3 = toY.u3;
if (!_LoadScanlineLinear(trow[0], width, pSrc + (rowPitch * u3), rowPitch, src->format, filter))
return E_FAIL;
}
for (size_t x = 0; x < nwidth; ++x)
{
auto& toX = cfX[x];
XMVECTOR C0, C1, C2, C3;
CUBIC_INTERPOLATE(C0, toX.x, urow[0][toX.u0], urow[0][toX.u1], urow[0][toX.u2], urow[0][toX.u3])
CUBIC_INTERPOLATE(C1, toX.x, vrow[0][toX.u0], vrow[0][toX.u1], vrow[0][toX.u2], vrow[0][toX.u3])
CUBIC_INTERPOLATE(C2, toX.x, srow[0][toX.u0], srow[0][toX.u1], srow[0][toX.u2], srow[0][toX.u3])
CUBIC_INTERPOLATE(C3, toX.x, trow[0][toX.u0], trow[0][toX.u1], trow[0][toX.u2], trow[0][toX.u3])
CUBIC_INTERPOLATE(target[x], toY.x, C0, C1, C2, C3)
}
if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
return E_FAIL;
pDest += dest->rowPitch;
}
}
if (height > 1)
height >>= 1;
if (width > 1)
width >>= 1;
if (depth > 1)
depth >>= 1;
}
return S_OK;
}