HRESULT Generate3DMipsCubicFilter()

in DirectXTex/DirectXTexMipmaps.cpp [2150:2525]


    HRESULT Generate3DMipsCubicFilter(size_t depth, size_t levels, TEX_FILTER_FLAGS filter, const ScratchImage& mipChain) noexcept
    {
        if (!depth || !mipChain.GetImages())
            return E_INVALIDARG;

        // This assumes that the base images are already placed into the mipChain at the top level... (see _Setup3DMips)

        assert(levels > 1);

        size_t width = mipChain.GetMetadata().width;
        size_t height = mipChain.GetMetadata().height;

        // Allocate temporary space (17 scanlines, plus X/Y/Z filters)
        auto scanline = make_AlignedArrayXMVECTOR(uint64_t(width) * 17);
        if (!scanline)
            return E_OUTOFMEMORY;

        std::unique_ptr<CubicFilter[]> cf(new (std::nothrow) CubicFilter[width + height + depth]);
        if (!cf)
            return E_OUTOFMEMORY;

        CubicFilter* cfX = cf.get();
        CubicFilter* cfY = cf.get() + width;
        CubicFilter* cfZ = cf.get() + width + height;

        XMVECTOR* target = scanline.get();

        XMVECTOR* urow[4];
        XMVECTOR* vrow[4];
        XMVECTOR* srow[4];
        XMVECTOR* trow[4];

        XMVECTOR *ptr = scanline.get() + width;
        for (size_t j = 0; j < 4; ++j)
        {
            urow[j] = ptr;  ptr += width;
            vrow[j] = ptr;  ptr += width;
            srow[j] = ptr;  ptr += width;
            trow[j] = ptr;  ptr += width;
        }

        // Resize base image to each target mip level
        for (size_t level = 1; level < levels; ++level)
        {
            size_t nwidth = (width > 1) ? (width >> 1) : 1;
            _CreateCubicFilter(width, nwidth, (filter & TEX_FILTER_WRAP_U) != 0, (filter & TEX_FILTER_MIRROR_U) != 0, cfX);

            size_t nheight = (height > 1) ? (height >> 1) : 1;
            _CreateCubicFilter(height, nheight, (filter & TEX_FILTER_WRAP_V) != 0, (filter & TEX_FILTER_MIRROR_V) != 0, cfY);

#ifdef _DEBUG
            for (size_t j = 0; j < 4; ++j)
            {
                memset(urow[j], 0xCD, sizeof(XMVECTOR)*width);
                memset(vrow[j], 0xDD, sizeof(XMVECTOR)*width);
                memset(srow[j], 0xED, sizeof(XMVECTOR)*width);
                memset(trow[j], 0xFD, sizeof(XMVECTOR)*width);
            }
#endif

            if (depth > 1)
            {
                // 3D cubic filter
                size_t ndepth = depth >> 1;
                _CreateCubicFilter(depth, ndepth, (filter & TEX_FILTER_WRAP_W) != 0, (filter & TEX_FILTER_MIRROR_W) != 0, cfZ);

                for (size_t slice = 0; slice < ndepth; ++slice)
                {
                    auto& toZ = cfZ[slice];

                    const Image* srca = mipChain.GetImage(level - 1, 0, toZ.u0);
                    const Image* srcb = mipChain.GetImage(level - 1, 0, toZ.u1);
                    const Image* srcc = mipChain.GetImage(level - 1, 0, toZ.u2);
                    const Image* srcd = mipChain.GetImage(level - 1, 0, toZ.u3);
                    if (!srca || !srcb || !srcc || !srcd)
                        return E_POINTER;

                    size_t u0 = size_t(-1);
                    size_t u1 = size_t(-1);
                    size_t u2 = size_t(-1);
                    size_t u3 = size_t(-1);

                    const Image* dest = mipChain.GetImage(level, 0, slice);
                    if (!dest)
                        return E_POINTER;

                    uint8_t* pDest = dest->pixels;

                    for (size_t y = 0; y < nheight; ++y)
                    {
                        auto& toY = cfY[y];

                        // Scanline 1
                        if (toY.u0 != u0)
                        {
                            if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
                            {
                                u0 = toY.u0;

                                if (!_LoadScanlineLinear(urow[0], width, srca->pixels + (srca->rowPitch * u0), srca->rowPitch, srca->format, filter)
                                    || !_LoadScanlineLinear(urow[1], width, srcb->pixels + (srcb->rowPitch * u0), srcb->rowPitch, srcb->format, filter)
                                    || !_LoadScanlineLinear(urow[2], width, srcc->pixels + (srcc->rowPitch * u0), srcc->rowPitch, srcc->format, filter)
                                    || !_LoadScanlineLinear(urow[3], width, srcd->pixels + (srcd->rowPitch * u0), srcd->rowPitch, srcd->format, filter))
                                    return E_FAIL;
                            }
                            else if (toY.u0 == u1)
                            {
                                u0 = u1;
                                u1 = size_t(-1);

                                std::swap(urow[0], vrow[0]);
                                std::swap(urow[1], vrow[1]);
                                std::swap(urow[2], vrow[2]);
                                std::swap(urow[3], vrow[3]);
                            }
                            else if (toY.u0 == u2)
                            {
                                u0 = u2;
                                u2 = size_t(-1);

                                std::swap(urow[0], srow[0]);
                                std::swap(urow[1], srow[1]);
                                std::swap(urow[2], srow[2]);
                                std::swap(urow[3], srow[3]);
                            }
                            else if (toY.u0 == u3)
                            {
                                u0 = u3;
                                u3 = size_t(-1);

                                std::swap(urow[0], trow[0]);
                                std::swap(urow[1], trow[1]);
                                std::swap(urow[2], trow[2]);
                                std::swap(urow[3], trow[3]);
                            }
                        }

                        // Scanline 2
                        if (toY.u1 != u1)
                        {
                            if (toY.u1 != u2 && toY.u1 != u3)
                            {
                                u1 = toY.u1;

                                if (!_LoadScanlineLinear(vrow[0], width, srca->pixels + (srca->rowPitch * u1), srca->rowPitch, srca->format, filter)
                                    || !_LoadScanlineLinear(vrow[1], width, srcb->pixels + (srcb->rowPitch * u1), srcb->rowPitch, srcb->format, filter)
                                    || !_LoadScanlineLinear(vrow[2], width, srcc->pixels + (srcc->rowPitch * u1), srcc->rowPitch, srcc->format, filter)
                                    || !_LoadScanlineLinear(vrow[3], width, srcd->pixels + (srcd->rowPitch * u1), srcd->rowPitch, srcd->format, filter))
                                    return E_FAIL;
                            }
                            else if (toY.u1 == u2)
                            {
                                u1 = u2;
                                u2 = size_t(-1);

                                std::swap(vrow[0], srow[0]);
                                std::swap(vrow[1], srow[1]);
                                std::swap(vrow[2], srow[2]);
                                std::swap(vrow[3], srow[3]);
                            }
                            else if (toY.u1 == u3)
                            {
                                u1 = u3;
                                u3 = size_t(-1);

                                std::swap(vrow[0], trow[0]);
                                std::swap(vrow[1], trow[1]);
                                std::swap(vrow[2], trow[2]);
                                std::swap(vrow[3], trow[3]);
                            }
                        }

                        // Scanline 3
                        if (toY.u2 != u2)
                        {
                            if (toY.u2 != u3)
                            {
                                u2 = toY.u2;

                                if (!_LoadScanlineLinear(srow[0], width, srca->pixels + (srca->rowPitch * u2), srca->rowPitch, srca->format, filter)
                                    || !_LoadScanlineLinear(srow[1], width, srcb->pixels + (srcb->rowPitch * u2), srcb->rowPitch, srcb->format, filter)
                                    || !_LoadScanlineLinear(srow[2], width, srcc->pixels + (srcc->rowPitch * u2), srcc->rowPitch, srcc->format, filter)
                                    || !_LoadScanlineLinear(srow[3], width, srcd->pixels + (srcd->rowPitch * u2), srcd->rowPitch, srcd->format, filter))
                                    return E_FAIL;
                            }
                            else
                            {
                                u2 = u3;
                                u3 = size_t(-1);

                                std::swap(srow[0], trow[0]);
                                std::swap(srow[1], trow[1]);
                                std::swap(srow[2], trow[2]);
                                std::swap(srow[3], trow[3]);
                            }
                        }

                        // Scanline 4
                        if (toY.u3 != u3)
                        {
                            u3 = toY.u3;

                            if (!_LoadScanlineLinear(trow[0], width, srca->pixels + (srca->rowPitch * u3), srca->rowPitch, srca->format, filter)
                                || !_LoadScanlineLinear(trow[1], width, srcb->pixels + (srcb->rowPitch * u3), srcb->rowPitch, srcb->format, filter)
                                || !_LoadScanlineLinear(trow[2], width, srcc->pixels + (srcc->rowPitch * u3), srcc->rowPitch, srcc->format, filter)
                                || !_LoadScanlineLinear(trow[3], width, srcd->pixels + (srcd->rowPitch * u3), srcd->rowPitch, srcd->format, filter))
                                return E_FAIL;
                        }

                        for (size_t x = 0; x < nwidth; ++x)
                        {
                            auto& toX = cfX[x];

                            XMVECTOR D[4];

                            for (size_t j = 0; j < 4; ++j)
                            {
                                XMVECTOR C0, C1, C2, C3;
                                CUBIC_INTERPOLATE(C0, toX.x, urow[j][toX.u0], urow[j][toX.u1], urow[j][toX.u2], urow[j][toX.u3])
                                CUBIC_INTERPOLATE(C1, toX.x, vrow[j][toX.u0], vrow[j][toX.u1], vrow[j][toX.u2], vrow[j][toX.u3])
                                CUBIC_INTERPOLATE(C2, toX.x, srow[j][toX.u0], srow[j][toX.u1], srow[j][toX.u2], srow[j][toX.u3])
                                CUBIC_INTERPOLATE(C3, toX.x, trow[j][toX.u0], trow[j][toX.u1], trow[j][toX.u2], trow[j][toX.u3])

                                CUBIC_INTERPOLATE(D[j], toY.x, C0, C1, C2, C3)
                            }

                            CUBIC_INTERPOLATE(target[x], toZ.x, D[0], D[1], D[2], D[3])
                        }

                        if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
                            return E_FAIL;
                        pDest += dest->rowPitch;
                    }
                }
            }
            else
            {
                // 2D cubic filter
                const Image* src = mipChain.GetImage(level - 1, 0, 0);
                const Image* dest = mipChain.GetImage(level, 0, 0);

                if (!src || !dest)
                    return E_POINTER;

                const uint8_t* pSrc = src->pixels;
                uint8_t* pDest = dest->pixels;

                size_t rowPitch = src->rowPitch;

                size_t u0 = size_t(-1);
                size_t u1 = size_t(-1);
                size_t u2 = size_t(-1);
                size_t u3 = size_t(-1);

                for (size_t y = 0; y < nheight; ++y)
                {
                    auto& toY = cfY[y];

                    // Scanline 1
                    if (toY.u0 != u0)
                    {
                        if (toY.u0 != u1 && toY.u0 != u2 && toY.u0 != u3)
                        {
                            u0 = toY.u0;

                            if (!_LoadScanlineLinear(urow[0], width, pSrc + (rowPitch * u0), rowPitch, src->format, filter))
                                return E_FAIL;
                        }
                        else if (toY.u0 == u1)
                        {
                            u0 = u1;
                            u1 = size_t(-1);

                            std::swap(urow[0], vrow[0]);
                        }
                        else if (toY.u0 == u2)
                        {
                            u0 = u2;
                            u2 = size_t(-1);

                            std::swap(urow[0], srow[0]);
                        }
                        else if (toY.u0 == u3)
                        {
                            u0 = u3;
                            u3 = size_t(-1);

                            std::swap(urow[0], trow[0]);
                        }
                    }

                    // Scanline 2
                    if (toY.u1 != u1)
                    {
                        if (toY.u1 != u2 && toY.u1 != u3)
                        {
                            u1 = toY.u1;

                            if (!_LoadScanlineLinear(vrow[0], width, pSrc + (rowPitch * u1), rowPitch, src->format, filter))
                                return E_FAIL;
                        }
                        else if (toY.u1 == u2)
                        {
                            u1 = u2;
                            u2 = size_t(-1);

                            std::swap(vrow[0], srow[0]);
                        }
                        else if (toY.u1 == u3)
                        {
                            u1 = u3;
                            u3 = size_t(-1);

                            std::swap(vrow[0], trow[0]);
                        }
                    }

                    // Scanline 3
                    if (toY.u2 != u2)
                    {
                        if (toY.u2 != u3)
                        {
                            u2 = toY.u2;

                            if (!_LoadScanlineLinear(srow[0], width, pSrc + (rowPitch * u2), rowPitch, src->format, filter))
                                return E_FAIL;
                        }
                        else
                        {
                            u2 = u3;
                            u3 = size_t(-1);

                            std::swap(srow[0], trow[0]);
                        }
                    }

                    // Scanline 4
                    if (toY.u3 != u3)
                    {
                        u3 = toY.u3;

                        if (!_LoadScanlineLinear(trow[0], width, pSrc + (rowPitch * u3), rowPitch, src->format, filter))
                            return E_FAIL;
                    }

                    for (size_t x = 0; x < nwidth; ++x)
                    {
                        auto& toX = cfX[x];

                        XMVECTOR C0, C1, C2, C3;
                        CUBIC_INTERPOLATE(C0, toX.x, urow[0][toX.u0], urow[0][toX.u1], urow[0][toX.u2], urow[0][toX.u3])
                        CUBIC_INTERPOLATE(C1, toX.x, vrow[0][toX.u0], vrow[0][toX.u1], vrow[0][toX.u2], vrow[0][toX.u3])
                        CUBIC_INTERPOLATE(C2, toX.x, srow[0][toX.u0], srow[0][toX.u1], srow[0][toX.u2], srow[0][toX.u3])
                        CUBIC_INTERPOLATE(C3, toX.x, trow[0][toX.u0], trow[0][toX.u1], trow[0][toX.u2], trow[0][toX.u3])

                        CUBIC_INTERPOLATE(target[x], toY.x, C0, C1, C2, C3)
                    }

                    if (!_StoreScanlineLinear(pDest, dest->rowPitch, dest->format, target, nwidth, filter))
                        return E_FAIL;
                    pDest += dest->rowPitch;
                }
            }

            if (height > 1)
                height >>= 1;

            if (width > 1)
                width >>= 1;

            if (depth > 1)
                depth >>= 1;
        }

        return S_OK;
    }