bool DirectX::_StoreScanline()

in Kits/DirectXTex/DirectXTexConvert.cpp [1606:2464]


bool DirectX::_StoreScanline(
    void* pDestination,
    size_t size,
    DXGI_FORMAT format,
    const XMVECTOR* pSource,
    size_t count,
    float threshold) noexcept
{
    assert(pDestination != nullptr);
    assert(IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format));

    if (!size || !count)
        return false;

    const XMVECTOR* __restrict sPtr = pSource;
    if (!sPtr)
        return false;

    assert((reinterpret_cast<uintptr_t>(pSource) & 0xF) == 0);

    const XMVECTOR* ePtr = sPtr + count;

#ifdef _PREFAST_
    *reinterpret_cast<uint8_t*>(pDestination) = 0;
#endif

    switch (static_cast<int>(format))
    {
    case DXGI_FORMAT_R32G32B32A32_FLOAT:
        STORE_SCANLINE(XMFLOAT4, XMStoreFloat4)

    case DXGI_FORMAT_R32G32B32A32_UINT:
        STORE_SCANLINE(XMUINT4, XMStoreUInt4)

    case DXGI_FORMAT_R32G32B32A32_SINT:
        STORE_SCANLINE(XMINT4, XMStoreSInt4)

    case DXGI_FORMAT_R32G32B32_FLOAT:
        STORE_SCANLINE(XMFLOAT3, XMStoreFloat3)

    case DXGI_FORMAT_R32G32B32_UINT:
        STORE_SCANLINE(XMUINT3, XMStoreUInt3)

    case DXGI_FORMAT_R32G32B32_SINT:
        STORE_SCANLINE(XMINT3, XMStoreSInt3)

    case DXGI_FORMAT_R16G16B16A16_FLOAT:
        if (size >= sizeof(XMHALF4))
        {
            XMHALF4* __restrict dPtr = static_cast<XMHALF4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMHALF4) + 1); icount += sizeof(XMHALF4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = *sPtr++;
                v = XMVectorClamp(v, g_HalfMin, g_HalfMax);
                XMStoreHalf4(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16G16B16A16_UNORM:
        STORE_SCANLINE(XMUSHORTN4, XMStoreUShortN4)

    case DXGI_FORMAT_R16G16B16A16_UINT:
        STORE_SCANLINE(XMUSHORT4, XMStoreUShort4)

    case DXGI_FORMAT_R16G16B16A16_SNORM:
        STORE_SCANLINE(XMSHORTN4, XMStoreShortN4)

    case DXGI_FORMAT_R16G16B16A16_SINT:
        STORE_SCANLINE(XMSHORT4, XMStoreShort4)

    case DXGI_FORMAT_R32G32_FLOAT:
        STORE_SCANLINE(XMFLOAT2, XMStoreFloat2)

    case DXGI_FORMAT_R32G32_UINT:
        STORE_SCANLINE(XMUINT2, XMStoreUInt2)

    case DXGI_FORMAT_R32G32_SINT:
        STORE_SCANLINE(XMINT2, XMStoreSInt2)

    case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
        {
            const size_t psize = sizeof(float) + sizeof(uint32_t);
            if (size >= psize)
            {
                auto dPtr = static_cast<float*>(pDestination);
                for (size_t icount = 0; icount < (size - psize + 1); icount += psize)
                {
                    if (sPtr >= ePtr) break;
                    XMFLOAT4 f;
                    XMStoreFloat4(&f, *sPtr++);
                    dPtr[0] = f.x;
                    auto ps8 = reinterpret_cast<uint8_t*>(&dPtr[1]);
                    ps8[0] = static_cast<uint8_t>(std::min<float>(255.f, std::max<float>(0.f, f.y)));
                    ps8[1] = ps8[2] = ps8[3] = 0;
                    dPtr += 2;
                }
                return true;
            }
        }
        return false;

    case DXGI_FORMAT_R10G10B10A2_UNORM:
        STORE_SCANLINE(XMUDECN4, XMStoreUDecN4)

    case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
        STORE_SCANLINE(XMUDECN4, XMStoreUDecN4_XR)

    case DXGI_FORMAT_R10G10B10A2_UINT:
        STORE_SCANLINE(XMUDEC4, XMStoreUDec4)

    case DXGI_FORMAT_R11G11B10_FLOAT:
        STORE_SCANLINE(XMFLOAT3PK, XMStoreFloat3PK)

    case DXGI_FORMAT_R8G8B8A8_UNORM:
    case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
        if (size >= sizeof(XMUBYTEN4))
        {
            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorAdd(*sPtr++, g_8BitBias);
                XMStoreUByteN4(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8G8B8A8_UINT:
        STORE_SCANLINE(XMUBYTE4, XMStoreUByte4)

    case DXGI_FORMAT_R8G8B8A8_SNORM:
        STORE_SCANLINE(XMBYTEN4, XMStoreByteN4)

    case DXGI_FORMAT_R8G8B8A8_SINT:
        STORE_SCANLINE(XMBYTE4, XMStoreByte4)

    case DXGI_FORMAT_R16G16_FLOAT:
        if (size >= sizeof(XMHALF2))
        {
            XMHALF2* __restrict dPtr = static_cast<XMHALF2*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMHALF2) + 1); icount += sizeof(XMHALF2))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = *sPtr++;
                v = XMVectorClamp(v, g_HalfMin, g_HalfMax);
                XMStoreHalf2(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16G16_UNORM:
        STORE_SCANLINE(XMUSHORTN2, XMStoreUShortN2)

    case DXGI_FORMAT_R16G16_UINT:
        STORE_SCANLINE(XMUSHORT2, XMStoreUShort2)

    case DXGI_FORMAT_R16G16_SNORM:
        STORE_SCANLINE(XMSHORTN2, XMStoreShortN2)

    case DXGI_FORMAT_R16G16_SINT:
        STORE_SCANLINE(XMSHORT2, XMStoreShort2)

    case DXGI_FORMAT_D32_FLOAT:
    case DXGI_FORMAT_R32_FLOAT:
        if (size >= sizeof(float))
        {
            float * __restrict dPtr = static_cast<float*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(float) + 1); icount += sizeof(float))
            {
                if (sPtr >= ePtr) break;
                XMStoreFloat(dPtr++, *(sPtr++));
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R32_UINT:
        if (size >= sizeof(uint32_t))
        {
            uint32_t * __restrict dPtr = static_cast<uint32_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(uint32_t) + 1); icount += sizeof(uint32_t))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMConvertVectorFloatToUInt(*(sPtr++), 0);
                XMStoreInt(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R32_SINT:
        if (size >= sizeof(int32_t))
        {
            uint32_t * __restrict dPtr = static_cast<uint32_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(int32_t) + 1); icount += sizeof(int32_t))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMConvertVectorFloatToInt(*(sPtr++), 0);
                XMStoreInt(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_D24_UNORM_S8_UINT:
        if (size >= sizeof(uint32_t))
        {
            static const XMVECTORF32 clamp = { { { 1.f, 255.f, 0.f, 0.f } } };
            XMVECTOR zero = XMVectorZero();
            auto dPtr = static_cast<uint32_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(uint32_t) + 1); icount += sizeof(uint32_t))
            {
                if (sPtr >= ePtr) break;
                XMFLOAT4 f;
                XMStoreFloat4(&f, XMVectorClamp(*sPtr++, zero, clamp));
                *dPtr++ = (static_cast<uint32_t>(f.x * 16777215.f) & 0xFFFFFF)
                    | ((static_cast<uint32_t>(f.y) & 0xFF) << 24);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8G8_UNORM:
        STORE_SCANLINE(XMUBYTEN2, XMStoreUByteN2)

    case DXGI_FORMAT_R8G8_UINT:
        STORE_SCANLINE(XMUBYTE2, XMStoreUByte2)

    case DXGI_FORMAT_R8G8_SNORM:
        STORE_SCANLINE(XMBYTEN2, XMStoreByteN2)

    case DXGI_FORMAT_R8G8_SINT:
        STORE_SCANLINE(XMBYTE2, XMStoreByte2)

    case DXGI_FORMAT_R16_FLOAT:
        if (size >= sizeof(HALF))
        {
            HALF * __restrict dPtr = static_cast<HALF*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(HALF) + 1); icount += sizeof(HALF))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 65504.f), -65504.f);
                *(dPtr++) = XMConvertFloatToHalf(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_D16_UNORM:
    case DXGI_FORMAT_R16_UNORM:
        if (size >= sizeof(uint16_t))
        {
            uint16_t * __restrict dPtr = static_cast<uint16_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(uint16_t) + 1); icount += sizeof(uint16_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 1.f), 0.f);
                *(dPtr++) = static_cast<uint16_t>(v*65535.f + 0.5f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16_UINT:
        if (size >= sizeof(uint16_t))
        {
            uint16_t * __restrict dPtr = static_cast<uint16_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(uint16_t) + 1); icount += sizeof(uint16_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 65535.f), 0.f);
                *(dPtr++) = static_cast<uint16_t>(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16_SNORM:
        if (size >= sizeof(int16_t))
        {
            int16_t * __restrict dPtr = static_cast<int16_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(int16_t) + 1); icount += sizeof(int16_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 1.f), -1.f);
                *(dPtr++) = static_cast<int16_t>(lroundf(v * 32767.f));
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16_SINT:
        if (size >= sizeof(int16_t))
        {
            int16_t * __restrict dPtr = static_cast<int16_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(int16_t) + 1); icount += sizeof(int16_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 32767.f), -32767.f);
                *(dPtr++) = static_cast<int16_t>(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_UNORM:
        if (size >= sizeof(uint8_t))
        {
            uint8_t * __restrict dPtr = static_cast<uint8_t*>(pDestination);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 1.f), 0.f);
                *(dPtr++) = static_cast<uint8_t>(v * 255.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_UINT:
        if (size >= sizeof(uint8_t))
        {
            uint8_t * __restrict dPtr = static_cast<uint8_t*>(pDestination);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 255.f), 0.f);
                *(dPtr++) = static_cast<uint8_t>(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_SNORM:
        if (size >= sizeof(int8_t))
        {
            int8_t * __restrict dPtr = static_cast<int8_t*>(pDestination);
            for (size_t icount = 0; icount < size; icount += sizeof(int8_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 1.f), -1.f);
                *(dPtr++) = static_cast<int8_t>(lroundf(v * 127.f));
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_SINT:
        if (size >= sizeof(int8_t))
        {
            int8_t * __restrict dPtr = static_cast<int8_t*>(pDestination);
            for (size_t icount = 0; icount < size; icount += sizeof(int8_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetX(*sPtr++);
                v = std::max<float>(std::min<float>(v, 127.f), -127.f);
                *(dPtr++) = static_cast<int8_t>(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_A8_UNORM:
        if (size >= sizeof(uint8_t))
        {
            uint8_t * __restrict dPtr = static_cast<uint8_t*>(pDestination);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                if (sPtr >= ePtr) break;
                float v = XMVectorGetW(*sPtr++);
                v = std::max<float>(std::min<float>(v, 1.f), 0.f);
                *(dPtr++) = static_cast<uint8_t>(v * 255.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R1_UNORM:
        if (size >= sizeof(uint8_t))
        {
            uint8_t * __restrict dPtr = static_cast<uint8_t*>(pDestination);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                uint8_t pixels = 0;
                for (size_t bcount = 8; bcount > 0; --bcount)
                {
                    if (sPtr >= ePtr) break;
                    float v = XMVectorGetX(*sPtr++);

                    // Absolute thresholding generally doesn't give good results for all images
                    // Picking the 'right' threshold automatically requires whole-image analysis

                    if (v > 0.25f)
                        pixels |= 1 << (bcount - 1);
                }
                *(dPtr++) = pixels;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
        STORE_SCANLINE(XMFLOAT3SE, StoreFloat3SE)

    case DXGI_FORMAT_R8G8_B8G8_UNORM:
        if (size >= sizeof(XMUBYTEN4))
        {
            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v0 = *sPtr++;
                XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY(*sPtr++) : XMVectorZero();
                XMVECTOR v = XMVectorSelect(v1, v0, g_XMSelect1110);
                v = XMVectorAdd(v, g_8BitBias);
                XMStoreUByteN4(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_G8R8_G8B8_UNORM:
        if (size >= sizeof(XMUBYTEN4))
        {
            static XMVECTORU32 select1101 = { { { XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1 } } };

            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>(*sPtr++);
                XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY(*sPtr++) : XMVectorZero();
                XMVECTOR v = XMVectorSelect(v1, v0, select1101);
                v = XMVectorAdd(v, g_8BitBias);
                XMStoreUByteN4(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B5G6R5_UNORM:
        if (size >= sizeof(XMU565))
        {
            static const XMVECTORF32 s_Scale = { { { 31.f, 63.f, 31.f, 1.f } } };
            XMU565 * __restrict dPtr = static_cast<XMU565*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMU565) + 1); icount += sizeof(XMU565))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
                v = XMVectorMultiply(v, s_Scale);
                XMStoreU565(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B5G5R5A1_UNORM:
        if (size >= sizeof(XMU555))
        {
            static const XMVECTORF32 s_Scale = { { { 31.f, 31.f, 31.f, 1.f } } };
            XMU555 * __restrict dPtr = static_cast<XMU555*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMU555) + 1); icount += sizeof(XMU555))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
                v = XMVectorMultiply(v, s_Scale);
                XMStoreU555(dPtr, v);
                dPtr->w = (XMVectorGetW(v) > threshold) ? 1u : 0u;
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B8G8R8A8_UNORM:
    case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
        if (size >= sizeof(XMUBYTEN4))
        {
            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
                v = XMVectorAdd(v, g_8BitBias);
                XMStoreUByteN4(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B8G8R8X8_UNORM:
    case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
        if (size >= sizeof(XMUBYTEN4))
        {
            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorPermute<2, 1, 0, 7>(*sPtr++, g_XMIdentityR3);
                v = XMVectorAdd(v, g_8BitBias);
                XMStoreUByteN4(dPtr++, v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_AYUV:
        if (size >= sizeof(XMUBYTEN4))
        {
            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;

                XMUBYTEN4 rgba;
                XMStoreUByteN4(&rgba, *sPtr++);

                // http://msdn.microsoft.com/en-us/library/windows/desktop/dd206750.aspx

                // Y  =  0.2568R + 0.5041G + 0.1001B + 16
                // Cb = -0.1482R - 0.2910G + 0.4392B + 128
                // Cr =  0.4392R - 0.3678G - 0.0714B + 128

                int y = ((66 * rgba.x + 129 * rgba.y + 25 * rgba.z + 128) >> 8) + 16;
                int u = ((-38 * rgba.x - 74 * rgba.y + 112 * rgba.z + 128) >> 8) + 128;
                int v = ((112 * rgba.x - 94 * rgba.y - 18 * rgba.z + 128) >> 8) + 128;

                dPtr->x = static_cast<uint8_t>(std::min<int>(std::max<int>(v, 0), 255));
                dPtr->y = static_cast<uint8_t>(std::min<int>(std::max<int>(u, 0), 255));
                dPtr->z = static_cast<uint8_t>(std::min<int>(std::max<int>(y, 0), 255));
                dPtr->w = rgba.w;
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y410:
        if (size >= sizeof(XMUDECN4))
        {
            XMUDECN4 * __restrict dPtr = static_cast<XMUDECN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUDECN4) + 1); icount += sizeof(XMUDECN4))
            {
                if (sPtr >= ePtr) break;

                XMUDECN4 rgba;
                XMStoreUDecN4(&rgba, *sPtr++);

                // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx

                // Y  =  0.2560R + 0.5027G + 0.0998B + 64
                // Cb = -0.1478R - 0.2902G + 0.4379B + 512
                // Cr =  0.4379R - 0.3667G - 0.0712B + 512

                int64_t r = rgba.x;
                int64_t g = rgba.y;
                int64_t b = rgba.z;

                int y = static_cast<int>((16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
                int u = static_cast<int>((-9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
                int v = static_cast<int>((28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;

                dPtr->x = static_cast<uint32_t>(std::min<int>(std::max<int>(u, 0), 1023));
                dPtr->y = static_cast<uint32_t>(std::min<int>(std::max<int>(y, 0), 1023));
                dPtr->z = static_cast<uint32_t>(std::min<int>(std::max<int>(v, 0), 1023));
                dPtr->w = rgba.w;
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y416:
        if (size >= sizeof(XMUSHORTN4))
        {
            XMUSHORTN4 * __restrict dPtr = static_cast<XMUSHORTN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUSHORTN4) + 1); icount += sizeof(XMUSHORTN4))
            {
                if (sPtr >= ePtr) break;

                XMUSHORTN4 rgba;
                XMStoreUShortN4(&rgba, *sPtr++);

                // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx

                // Y  =  0.2558R + 0.5022G + 0.0998B + 4096
                // Cb = -0.1476R - 0.2899G + 0.4375B + 32768
                // Cr =  0.4375R - 0.3664G - 0.0711B + 32768

                int64_t r = int64_t(rgba.x);
                int64_t g = int64_t(rgba.y);
                int64_t b = int64_t(rgba.z);

                int y = static_cast<int>((16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
                int u = static_cast<int>((-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
                int v = static_cast<int>((28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;

                dPtr->x = static_cast<uint16_t>(std::min<int>(std::max<int>(u, 0), 65535));
                dPtr->y = static_cast<uint16_t>(std::min<int>(std::max<int>(y, 0), 65535));
                dPtr->z = static_cast<uint16_t>(std::min<int>(std::max<int>(v, 0), 65535));
                dPtr->w = rgba.w;
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_YUY2:
        if (size >= sizeof(XMUBYTEN4))
        {
            XMUBYTEN4 * __restrict dPtr = static_cast<XMUBYTEN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                if (sPtr >= ePtr) break;

                XMUBYTEN4 rgb1;
                XMStoreUByteN4(&rgb1, *sPtr++);

                // See AYUV
                int y0 = ((66 * rgb1.x + 129 * rgb1.y + 25 * rgb1.z + 128) >> 8) + 16;
                int u0 = ((-38 * rgb1.x - 74 * rgb1.y + 112 * rgb1.z + 128) >> 8) + 128;
                int v0 = ((112 * rgb1.x - 94 * rgb1.y - 18 * rgb1.z + 128) >> 8) + 128;

                XMUBYTEN4 rgb2;
                if (sPtr < ePtr)
                {
                    XMStoreUByteN4(&rgb2, *sPtr++);
                }
                else
                {
                    rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
                }

                int y1 = ((66 * rgb2.x + 129 * rgb2.y + 25 * rgb2.z + 128) >> 8) + 16;
                int u1 = ((-38 * rgb2.x - 74 * rgb2.y + 112 * rgb2.z + 128) >> 8) + 128;
                int v1 = ((112 * rgb2.x - 94 * rgb2.y - 18 * rgb2.z + 128) >> 8) + 128;

                dPtr->x = static_cast<uint8_t>(std::min<int>(std::max<int>(y0, 0), 255));
                dPtr->y = static_cast<uint8_t>(std::min<int>(std::max<int>((u0 + u1) >> 1, 0), 255));
                dPtr->z = static_cast<uint8_t>(std::min<int>(std::max<int>(y1, 0), 255));
                dPtr->w = static_cast<uint8_t>(std::min<int>(std::max<int>((v0 + v1) >> 1, 0), 255));
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y210:
        // Same as Y216 with least significant 6 bits set to zero
        if (size >= sizeof(XMUSHORTN4))
        {
            XMUSHORTN4 * __restrict dPtr = static_cast<XMUSHORTN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUSHORTN4) + 1); icount += sizeof(XMUSHORTN4))
            {
                if (sPtr >= ePtr) break;

                XMUDECN4 rgb1;
                XMStoreUDecN4(&rgb1, *sPtr++);

                // See Y410
                int64_t r = rgb1.x;
                int64_t g = rgb1.y;
                int64_t b = rgb1.z;

                int y0 = static_cast<int>((16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
                int u0 = static_cast<int>((-9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
                int v0 = static_cast<int>((28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;

                XMUDECN4 rgb2;
                if (sPtr < ePtr)
                {
                    XMStoreUDecN4(&rgb2, *sPtr++);
                }
                else
                {
                    rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
                }

                r = rgb2.x;
                g = rgb2.y;
                b = rgb2.z;

                int y1 = static_cast<int>((16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
                int u1 = static_cast<int>((-9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
                int v1 = static_cast<int>((28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;

                dPtr->x = static_cast<uint16_t>(std::min<int>(std::max<int>(y0, 0), 1023) << 6);
                dPtr->y = static_cast<uint16_t>(std::min<int>(std::max<int>((u0 + u1) >> 1, 0), 1023) << 6);
                dPtr->z = static_cast<uint16_t>(std::min<int>(std::max<int>(y1, 0), 1023) << 6);
                dPtr->w = static_cast<uint16_t>(std::min<int>(std::max<int>((v0 + v1) >> 1, 0), 1023) << 6);
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y216:
        if (size >= sizeof(XMUSHORTN4))
        {
            XMUSHORTN4 * __restrict dPtr = static_cast<XMUSHORTN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUSHORTN4) + 1); icount += sizeof(XMUSHORTN4))
            {
                if (sPtr >= ePtr) break;

                XMUSHORTN4 rgb1;
                XMStoreUShortN4(&rgb1, *sPtr++);

                // See Y416
                int64_t r = int64_t(rgb1.x);
                int64_t g = int64_t(rgb1.y);
                int64_t b = int64_t(rgb1.z);

                int y0 = static_cast<int>((16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
                int u0 = static_cast<int>((-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
                int v0 = static_cast<int>((28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;

                XMUSHORTN4 rgb2;
                if (sPtr < ePtr)
                {
                    XMStoreUShortN4(&rgb2, *sPtr++);
                }
                else
                {
                    rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
                }

                r = int64_t(rgb2.x);
                g = int64_t(rgb2.y);
                b = int64_t(rgb2.z);

                int y1 = static_cast<int>((16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
                int u1 = static_cast<int>((-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
                int v1 = static_cast<int>((28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;

                dPtr->x = static_cast<uint16_t>(std::min<int>(std::max<int>(y0, 0), 65535));
                dPtr->y = static_cast<uint16_t>(std::min<int>(std::max<int>((u0 + u1) >> 1, 0), 65535));
                dPtr->z = static_cast<uint16_t>(std::min<int>(std::max<int>(y1, 0), 65535));
                dPtr->w = static_cast<uint16_t>(std::min<int>(std::max<int>((v0 + v1) >> 1, 0), 65535));
                ++dPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B4G4R4A4_UNORM:
        if (size >= sizeof(XMUNIBBLE4))
        {
            static const XMVECTORF32 s_Scale = { { { 15.f, 15.f, 15.f, 15.f } } };
            XMUNIBBLE4 * __restrict dPtr = static_cast<XMUNIBBLE4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUNIBBLE4) + 1); icount += sizeof(XMUNIBBLE4))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
                v = XMVectorMultiply(v, s_Scale);
                XMStoreUNibble4(dPtr++, v);
            }
            return true;
        }
        return false;

    case XBOX_DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT:
        // Xbox One specific 7e3 format with alpha
        if (size >= sizeof(XMUDECN4))
        {
            static const XMVECTORF32  Scale = { { { 1.0f, 1.0f, 1.0f, 3.0f } } };
            static const XMVECTORF32  C = { { { 31.875f, 31.875f, 31.875f, 3.f } } };

            XMUDECN4 * __restrict dPtr = static_cast<XMUDECN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUDECN4) + 1); icount += sizeof(XMUDECN4))
            {
                if (sPtr >= ePtr) break;

                XMVECTOR V = XMVectorMultiply(*sPtr++, Scale);
                V = XMVectorClamp(V, g_XMZero, C);

                XMFLOAT4A tmp;
                XMStoreFloat4A(&tmp, V);

                dPtr->x = FloatTo7e3(tmp.x);
                dPtr->y = FloatTo7e3(tmp.y);
                dPtr->z = FloatTo7e3(tmp.z);
                dPtr->w = static_cast<uint32_t>(tmp.w);
                ++dPtr;
            }
            return true;
        }
        return false;

    case XBOX_DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT:
        // Xbox One specific 6e4 format with alpha
        if (size >= sizeof(XMUDECN4))
        {
            static const XMVECTORF32  Scale = { { { 1.0f, 1.0f, 1.0f, 3.0f } } };
            static const XMVECTORF32  C = { { { 508.f, 508.f, 508.f, 3.f } } };

            XMUDECN4 * __restrict dPtr = static_cast<XMUDECN4*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMUDECN4) + 1); icount += sizeof(XMUDECN4))
            {
                if (sPtr >= ePtr) break;

                XMVECTOR V = XMVectorMultiply(*sPtr++, Scale);
                V = XMVectorClamp(V, g_XMZero, C);

                XMFLOAT4A tmp;
                XMStoreFloat4A(&tmp, V);

                dPtr->x = FloatTo6e4(tmp.x);
                dPtr->y = FloatTo6e4(tmp.y);
                dPtr->z = FloatTo6e4(tmp.z);
                dPtr->w = static_cast<uint32_t>(tmp.w);
                ++dPtr;
            }
            return true;
        }
        return false;

    case XBOX_DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM:
        // Xbox One specific format
        STORE_SCANLINE(XMXDECN4, XMStoreXDecN4)

    case XBOX_DXGI_FORMAT_R4G4_UNORM:
        // Xbox One specific format
        if (size >= sizeof(uint8_t))
        {
            static const XMVECTORF32 s_Scale = { { { 15.f, 15.f, 0.f, 0.f } } };
            uint8_t * __restrict dPtr = static_cast<uint8_t*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(uint8_t) + 1); icount += sizeof(uint8_t))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorMultiply(*sPtr++, s_Scale);

                XMUNIBBLE4 nibble;
                XMStoreUNibble4(&nibble, v);
                *dPtr = static_cast<uint8_t>(nibble.v);
                ++dPtr;
            }
            return true;
        }
        return false;

        // We don't support the planar or palettized formats

    default:
        return false;
    }
}