_Use_decl_annotations_ bool DirectX::_LoadScanline()

in Kits/DirectXTex/DirectXTexConvert.cpp [758:1582]


_Use_decl_annotations_ bool DirectX::_LoadScanline(
    XMVECTOR* pDestination,
    size_t count,
    const void* pSource,
    size_t size,
    DXGI_FORMAT format) noexcept
{
    assert(pDestination && count > 0 && ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) == 0));
    assert(pSource && size > 0);
    assert(IsValid(format) && !IsTypeless(format, false) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format));

    XMVECTOR* __restrict dPtr = pDestination;
    if (!dPtr)
        return false;

    const XMVECTOR* ePtr = pDestination + count;

    switch (static_cast<int>(format))
    {
    case DXGI_FORMAT_R32G32B32A32_FLOAT:
    {
        size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
        memcpy(dPtr, pSource, msize);
    }
    return true;

    case DXGI_FORMAT_R32G32B32A32_UINT:
        LOAD_SCANLINE(XMUINT4, XMLoadUInt4)

    case DXGI_FORMAT_R32G32B32A32_SINT:
        LOAD_SCANLINE(XMINT4, XMLoadSInt4)

    case DXGI_FORMAT_R32G32B32_FLOAT:
        LOAD_SCANLINE3(XMFLOAT3, XMLoadFloat3, g_XMIdentityR3)

    case DXGI_FORMAT_R32G32B32_UINT:
        LOAD_SCANLINE3(XMUINT3, XMLoadUInt3, g_XMIdentityR3)

    case DXGI_FORMAT_R32G32B32_SINT:
        LOAD_SCANLINE3(XMINT3, XMLoadSInt3, g_XMIdentityR3)

    case DXGI_FORMAT_R16G16B16A16_FLOAT:
        LOAD_SCANLINE(XMHALF4, XMLoadHalf4)

    case DXGI_FORMAT_R16G16B16A16_UNORM:
        LOAD_SCANLINE(XMUSHORTN4, XMLoadUShortN4)

    case DXGI_FORMAT_R16G16B16A16_UINT:
        LOAD_SCANLINE(XMUSHORT4, XMLoadUShort4)

    case DXGI_FORMAT_R16G16B16A16_SNORM:
        LOAD_SCANLINE(XMSHORTN4, XMLoadShortN4)

    case DXGI_FORMAT_R16G16B16A16_SINT:
        LOAD_SCANLINE(XMSHORT4, XMLoadShort4)

    case DXGI_FORMAT_R32G32_FLOAT:
        LOAD_SCANLINE2(XMFLOAT2, XMLoadFloat2, g_XMIdentityR3)

    case DXGI_FORMAT_R32G32_UINT:
        LOAD_SCANLINE2(XMUINT2, XMLoadUInt2, g_XMIdentityR3)

    case DXGI_FORMAT_R32G32_SINT:
        LOAD_SCANLINE2(XMINT2, XMLoadSInt2, g_XMIdentityR3)

    case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
        {
            const size_t psize = sizeof(float) + sizeof(uint32_t);
            if (size >= psize)
            {
                auto sPtr = static_cast<const float*>(pSource);
                for (size_t icount = 0; icount < (size - psize + 1); icount += psize)
                {
                    auto ps8 = reinterpret_cast<const uint8_t*>(&sPtr[1]);
                    if (dPtr >= ePtr) break;
                    *(dPtr++) = XMVectorSet(sPtr[0], static_cast<float>(*ps8), 0.f, 1.f);
                    sPtr += 2;
                }
                return true;
            }
        }
        return false;

    case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
    {
        const size_t psize = sizeof(float) + sizeof(uint32_t);
        if (size >= psize)
        {
            auto sPtr = static_cast<const float*>(pSource);
            for (size_t icount = 0; icount < (size - psize + 1); icount += psize)
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(sPtr[0], 0.f /* typeless component assumed zero */, 0.f, 1.f);
                sPtr += 2;
            }
            return true;
        }
    }
    return false;

    case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
    {
        const size_t psize = sizeof(float) + sizeof(uint32_t);
        if (size >= psize)
        {
            auto sPtr = static_cast<const float*>(pSource);
            for (size_t icount = 0; icount < (size - psize + 1); icount += psize)
            {
                auto pg8 = reinterpret_cast<const uint8_t*>(&sPtr[1]);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(0.f /* typeless component assumed zero */, static_cast<float>(*pg8), 0.f, 1.f);
                sPtr += 2;
            }
            return true;
        }
    }
    return false;

    case DXGI_FORMAT_R10G10B10A2_UNORM:
        LOAD_SCANLINE(XMUDECN4, XMLoadUDecN4)

    case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
        LOAD_SCANLINE(XMUDECN4, XMLoadUDecN4_XR)

    case DXGI_FORMAT_R10G10B10A2_UINT:
        LOAD_SCANLINE(XMUDEC4, XMLoadUDec4)

    case DXGI_FORMAT_R11G11B10_FLOAT:
        LOAD_SCANLINE3(XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3)

    case DXGI_FORMAT_R8G8B8A8_UNORM:
    case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
        LOAD_SCANLINE(XMUBYTEN4, XMLoadUByteN4)

    case DXGI_FORMAT_R8G8B8A8_UINT:
        LOAD_SCANLINE(XMUBYTE4, XMLoadUByte4)

    case DXGI_FORMAT_R8G8B8A8_SNORM:
        LOAD_SCANLINE(XMBYTEN4, XMLoadByteN4)

    case DXGI_FORMAT_R8G8B8A8_SINT:
        LOAD_SCANLINE(XMBYTE4, XMLoadByte4)

    case DXGI_FORMAT_R16G16_FLOAT:
        LOAD_SCANLINE2(XMHALF2, XMLoadHalf2, g_XMIdentityR3)

    case DXGI_FORMAT_R16G16_UNORM:
        LOAD_SCANLINE2(XMUSHORTN2, XMLoadUShortN2, g_XMIdentityR3)

    case DXGI_FORMAT_R16G16_UINT:
        LOAD_SCANLINE2(XMUSHORT2, XMLoadUShort2, g_XMIdentityR3)

    case DXGI_FORMAT_R16G16_SNORM:
        LOAD_SCANLINE2(XMSHORTN2, XMLoadShortN2, g_XMIdentityR3)

    case DXGI_FORMAT_R16G16_SINT:
        LOAD_SCANLINE2(XMSHORT2, XMLoadShort2, g_XMIdentityR3)

    case DXGI_FORMAT_D32_FLOAT:
    case DXGI_FORMAT_R32_FLOAT:
        if (size >= sizeof(float))
        {
            const float* __restrict sPtr = static_cast<const float*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(float) + 1); icount += sizeof(float))
            {
                XMVECTOR v = XMLoadFloat(sPtr++);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R32_UINT:
        if (size >= sizeof(uint32_t))
        {
            const uint32_t* __restrict sPtr = static_cast<const uint32_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint32_t) + 1); icount += sizeof(uint32_t))
            {
                XMVECTOR v = XMLoadInt(sPtr++);
                v = XMConvertVectorUIntToFloat(v, 0);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R32_SINT:
        if (size >= sizeof(int32_t))
        {
            const int32_t * __restrict sPtr = static_cast<const int32_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(int32_t) + 1); icount += sizeof(int32_t))
            {
                XMVECTOR v = XMLoadInt(reinterpret_cast<const uint32_t*>(sPtr++));
                v = XMConvertVectorIntToFloat(v, 0);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1000);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_D24_UNORM_S8_UINT:
        if (size >= sizeof(uint32_t))
        {
            auto sPtr = static_cast<const uint32_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint32_t) + 1); icount += sizeof(uint32_t))
            {
                auto d = static_cast<float>(*sPtr & 0xFFFFFF) / 16777215.f;
                auto s = static_cast<float>((*sPtr & 0xFF000000) >> 24);
                ++sPtr;
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(d, s, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
        if (size >= sizeof(uint32_t))
        {
            auto sPtr = static_cast<const uint32_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint32_t) + 1); icount += sizeof(uint32_t))
            {
                auto r = static_cast<float>(*sPtr & 0xFFFFFF) / 16777215.f;
                ++sPtr;
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(r, 0.f /* typeless component assumed zero */, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
        if (size >= sizeof(uint32_t))
        {
            auto sPtr = static_cast<const uint32_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint32_t) + 1); icount += sizeof(uint32_t))
            {
                auto g = static_cast<float>((*sPtr & 0xFF000000) >> 24);
                ++sPtr;
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(0.f /* typeless component assumed zero */, g, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8G8_UNORM:
        LOAD_SCANLINE2(XMUBYTEN2, XMLoadUByteN2, g_XMIdentityR3)

    case DXGI_FORMAT_R8G8_UINT:
        LOAD_SCANLINE2(XMUBYTE2, XMLoadUByte2, g_XMIdentityR3)

    case DXGI_FORMAT_R8G8_SNORM:
        LOAD_SCANLINE2(XMBYTEN2, XMLoadByteN2, g_XMIdentityR3)

    case DXGI_FORMAT_R8G8_SINT:
        LOAD_SCANLINE2(XMBYTE2, XMLoadByte2, g_XMIdentityR3)

    case DXGI_FORMAT_R16_FLOAT:
        if (size >= sizeof(HALF))
        {
            const HALF * __restrict sPtr = static_cast<const HALF*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(HALF) + 1); icount += sizeof(HALF))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_D16_UNORM:
    case DXGI_FORMAT_R16_UNORM:
        if (size >= sizeof(uint16_t))
        {
            const uint16_t* __restrict sPtr = static_cast<const uint16_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint16_t) + 1); icount += sizeof(uint16_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++) / 65535.f, 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16_UINT:
        if (size >= sizeof(uint16_t))
        {
            const uint16_t * __restrict sPtr = static_cast<const uint16_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint16_t) + 1); icount += sizeof(uint16_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++), 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16_SNORM:
        if (size >= sizeof(int16_t))
        {
            const int16_t * __restrict sPtr = static_cast<const int16_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(int16_t) + 1); icount += sizeof(int16_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++) / 32767.f, 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R16_SINT:
        if (size >= sizeof(int16_t))
        {
            const int16_t * __restrict sPtr = static_cast<const int16_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(int16_t) + 1); icount += sizeof(int16_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++), 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_UNORM:
        if (size >= sizeof(uint8_t))
        {
            const uint8_t * __restrict sPtr = static_cast<const uint8_t*>(pSource);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++) / 255.f, 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_UINT:
        if (size >= sizeof(uint8_t))
        {
            const uint8_t * __restrict sPtr = static_cast<const uint8_t*>(pSource);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++), 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_SNORM:
        if (size >= sizeof(int8_t))
        {
            const int8_t * __restrict sPtr = static_cast<const int8_t*>(pSource);
            for (size_t icount = 0; icount < size; icount += sizeof(int8_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++) / 127.f, 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R8_SINT:
        if (size >= sizeof(int8_t))
        {
            const int8_t * __restrict sPtr = static_cast<const int8_t*>(pSource);
            for (size_t icount = 0; icount < size; icount += sizeof(int8_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(static_cast<float>(*sPtr++), 0.f, 0.f, 1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_A8_UNORM:
        if (size >= sizeof(uint8_t))
        {
            const uint8_t * __restrict sPtr = static_cast<const uint8_t*>(pSource);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(0.f, 0.f, 0.f, static_cast<float>(*sPtr++) / 255.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R1_UNORM:
        if (size >= sizeof(uint8_t))
        {
            const uint8_t * __restrict sPtr = static_cast<const uint8_t*>(pSource);
            for (size_t icount = 0; icount < size; icount += sizeof(uint8_t))
            {
                for (size_t bcount = 8; bcount > 0; --bcount)
                {
                    if (dPtr >= ePtr) break;
                    *(dPtr++) = XMVectorSet((((*sPtr >> (bcount - 1)) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f);
                }

                ++sPtr;
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
        LOAD_SCANLINE3(XMFLOAT3SE, XMLoadFloat3SE, g_XMIdentityR3)

    case DXGI_FORMAT_R8G8_B8G8_UNORM:
        if (size >= sizeof(XMUBYTEN4))
        {
            const XMUBYTEN4 * __restrict sPtr = static_cast<const XMUBYTEN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                XMVECTOR v = XMLoadUByteN4(sPtr++);
                XMVECTOR v1 = XMVectorSwizzle<0, 3, 2, 1>(v);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1110);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v1, g_XMSelect1110);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_G8R8_G8B8_UNORM:
        if (size >= sizeof(XMUBYTEN4))
        {
            const XMUBYTEN4 * __restrict sPtr = static_cast<const XMUBYTEN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                XMVECTOR v = XMLoadUByteN4(sPtr++);
                XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>(v);
                XMVECTOR v1 = XMVectorSwizzle<1, 2, 3, 0>(v);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v0, g_XMSelect1110);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v1, g_XMSelect1110);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B5G6R5_UNORM:
        if (size >= sizeof(XMU565))
        {
            static const XMVECTORF32 s_Scale = { { { 1.f / 31.f, 1.f / 63.f, 1.f / 31.f, 1.f } } };
            const XMU565 * __restrict sPtr = static_cast<const XMU565*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMU565) + 1); icount += sizeof(XMU565))
            {
                XMVECTOR v = XMLoadU565(sPtr++);
                v = XMVectorMultiply(v, s_Scale);
                v = XMVectorSwizzle<2, 1, 0, 3>(v);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1110);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B5G5R5A1_UNORM:
        if (size >= sizeof(XMU555))
        {
            static const XMVECTORF32 s_Scale = { { { 1.f / 31.f, 1.f / 31.f, 1.f / 31.f, 1.f } } };
            const XMU555 * __restrict sPtr = static_cast<const XMU555*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMU555) + 1); icount += sizeof(XMU555))
            {
                XMVECTOR v = XMLoadU555(sPtr++);
                v = XMVectorMultiply(v, s_Scale);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B8G8R8A8_UNORM:
    case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
        if (size >= sizeof(XMUBYTEN4))
        {
            const XMUBYTEN4 * __restrict sPtr = static_cast<const XMUBYTEN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                XMVECTOR v = XMLoadUByteN4(sPtr++);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>(v);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B8G8R8X8_UNORM:
    case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
        if (size >= sizeof(XMUBYTEN4))
        {
            const XMUBYTEN4 * __restrict sPtr = static_cast<const XMUBYTEN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                XMVECTOR v = XMLoadUByteN4(sPtr++);
                v = XMVectorSwizzle<2, 1, 0, 3>(v);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1110);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_AYUV:
        if (size >= sizeof(XMUBYTEN4))
        {
            const XMUBYTEN4 * __restrict sPtr = static_cast<const XMUBYTEN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                int v = int(sPtr->x) - 128;
                int u = int(sPtr->y) - 128;
                int y = int(sPtr->z) - 16;
                unsigned int a = sPtr->w;
                ++sPtr;

                // http://msdn.microsoft.com/en-us/library/windows/desktop/dd206750.aspx

                // Y'  = Y - 16
                // Cb' = Cb - 128
                // Cr' = Cr - 128

                // R = 1.1644Y' + 1.5960Cr'
                // G = 1.1644Y' - 0.3917Cb' - 0.8128Cr'
                // B = 1.1644Y' + 2.0172Cb'

                int r = (298 * y + 409 * v + 128) >> 8;
                int g = (298 * y - 100 * u - 208 * v + 128) >> 8;
                int b = (298 * y + 516 * u + 128) >> 8;

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 255)) / 255.f,
                    float(std::min<int>(std::max<int>(g, 0), 255)) / 255.f,
                    float(std::min<int>(std::max<int>(b, 0), 255)) / 255.f,
                    float(a) / 255.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y410:
        if (size >= sizeof(XMUDECN4))
        {
            const XMUDECN4 * __restrict sPtr = static_cast<const XMUDECN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUDECN4) + 1); icount += sizeof(XMUDECN4))
            {
                int64_t u = int(sPtr->x) - 512;
                int64_t y = int(sPtr->y) - 64;
                int64_t v = int(sPtr->z) - 512;
                unsigned int a = sPtr->w;
                ++sPtr;

                // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx

                // Y'  = Y - 64
                // Cb' = Cb - 512
                // Cr' = Cr - 512

                // R = 1.1678Y' + 1.6007Cr'
                // G = 1.1678Y' - 0.3929Cb' - 0.8152Cr'
                // B = 1.1678Y' + 2.0232Cb'

                auto r = static_cast<int>((76533 * y + 104905 * v + 32768) >> 16);
                auto g = static_cast<int>((76533 * y - 25747 * u - 53425 * v + 32768) >> 16);
                auto b = static_cast<int>((76533 * y + 132590 * u + 32768) >> 16);

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 1023)) / 1023.f,
                    float(std::min<int>(std::max<int>(g, 0), 1023)) / 1023.f,
                    float(std::min<int>(std::max<int>(b, 0), 1023)) / 1023.f,
                    float(a) / 3.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y416:
        if (size >= sizeof(XMUSHORTN4))
        {
            const XMUSHORTN4 * __restrict sPtr = static_cast<const XMUSHORTN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUSHORTN4) + 1); icount += sizeof(XMUSHORTN4))
            {
                int64_t u = int64_t(sPtr->x) - 32768;
                int64_t y = int64_t(sPtr->y) - 4096;
                int64_t v = int64_t(sPtr->z) - 32768;
                auto a = static_cast<int>(sPtr->w);
                ++sPtr;

                // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx

                // Y'  = Y - 4096
                // Cb' = Cb - 32768
                // Cr' = Cr - 32768

                // R = 1.1689Y' + 1.6023Cr'
                // G = 1.1689Y' - 0.3933Cb' - 0.8160Cr'
                // B = 1.1689Y'+ 2.0251Cb'

                int r = static_cast<int>((76607 * y + 105006 * v + 32768) >> 16);
                int g = static_cast<int>((76607 * y - 25772 * u - 53477 * v + 32768) >> 16);
                int b = static_cast<int>((76607 * y + 132718 * u + 32768) >> 16);

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(g, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(b, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(a, 0), 65535)) / 65535.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_YUY2:
        if (size >= sizeof(XMUBYTEN4))
        {
            const XMUBYTEN4 * __restrict sPtr = static_cast<const XMUBYTEN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUBYTEN4) + 1); icount += sizeof(XMUBYTEN4))
            {
                int y0 = int(sPtr->x) - 16;
                int u = int(sPtr->y) - 128;
                int y1 = int(sPtr->z) - 16;
                int v = int(sPtr->w) - 128;
                ++sPtr;

                // See AYUV
                int r = (298 * y0 + 409 * v + 128) >> 8;
                int g = (298 * y0 - 100 * u - 208 * v + 128) >> 8;
                int b = (298 * y0 + 516 * u + 128) >> 8;

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 255)) / 255.f,
                    float(std::min<int>(std::max<int>(g, 0), 255)) / 255.f,
                    float(std::min<int>(std::max<int>(b, 0), 255)) / 255.f,
                    1.f);

                r = (298 * y1 + 409 * v + 128) >> 8;
                g = (298 * y1 - 100 * u - 208 * v + 128) >> 8;
                b = (298 * y1 + 516 * u + 128) >> 8;

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 255)) / 255.f,
                    float(std::min<int>(std::max<int>(g, 0), 255)) / 255.f,
                    float(std::min<int>(std::max<int>(b, 0), 255)) / 255.f,
                    1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y210:
        // Same as Y216 with least significant 6 bits set to zero
        if (size >= sizeof(XMUSHORTN4))
        {
            const XMUSHORTN4 * __restrict sPtr = static_cast<const XMUSHORTN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUSHORTN4) + 1); icount += sizeof(XMUSHORTN4))
            {
                int64_t y0 = int64_t(sPtr->x >> 6) - 64;
                int64_t u = int64_t(sPtr->y >> 6) - 512;
                int64_t y1 = int64_t(sPtr->z >> 6) - 64;
                int64_t v = int64_t(sPtr->w >> 6) - 512;
                ++sPtr;

                // See Y410
                auto r = static_cast<int>((76533 * y0 + 104905 * v + 32768) >> 16);
                auto g = static_cast<int>((76533 * y0 - 25747 * u - 53425 * v + 32768) >> 16);
                auto b = static_cast<int>((76533 * y0 + 132590 * u + 32768) >> 16);

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 1023)) / 1023.f,
                    float(std::min<int>(std::max<int>(g, 0), 1023)) / 1023.f,
                    float(std::min<int>(std::max<int>(b, 0), 1023)) / 1023.f,
                    1.f);

                r = static_cast<int>((76533 * y1 + 104905 * v + 32768) >> 16);
                g = static_cast<int>((76533 * y1 - 25747 * u - 53425 * v + 32768) >> 16);
                b = static_cast<int>((76533 * y1 + 132590 * u + 32768) >> 16);

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 1023)) / 1023.f,
                    float(std::min<int>(std::max<int>(g, 0), 1023)) / 1023.f,
                    float(std::min<int>(std::max<int>(b, 0), 1023)) / 1023.f,
                    1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_Y216:
        if (size >= sizeof(XMUSHORTN4))
        {
            const XMUSHORTN4 * __restrict sPtr = static_cast<const XMUSHORTN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUSHORTN4) + 1); icount += sizeof(XMUSHORTN4))
            {
                int64_t y0 = int64_t(sPtr->x) - 4096;
                int64_t u = int64_t(sPtr->y) - 32768;
                int64_t y1 = int64_t(sPtr->z) - 4096;
                int64_t v = int64_t(sPtr->w) - 32768;
                ++sPtr;

                // See Y416
                auto r = static_cast<int>((76607 * y0 + 105006 * v + 32768) >> 16);
                auto g = static_cast<int>((76607 * y0 - 25772 * u - 53477 * v + 32768) >> 16);
                auto b = static_cast<int>((76607 * y0 + 132718 * u + 32768) >> 16);

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(g, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(b, 0), 65535)) / 65535.f,
                    1.f);

                r = static_cast<int>((76607 * y1 + 105006 * v + 32768) >> 16);
                g = static_cast<int>((76607 * y1 - 25772 * u - 53477 * v + 32768) >> 16);
                b = static_cast<int>((76607 * y1 + 132718 * u + 32768) >> 16);

                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSet(float(std::min<int>(std::max<int>(r, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(g, 0), 65535)) / 65535.f,
                    float(std::min<int>(std::max<int>(b, 0), 65535)) / 65535.f,
                    1.f);
            }
            return true;
        }
        return false;

    case DXGI_FORMAT_B4G4R4A4_UNORM:
        if (size >= sizeof(XMUNIBBLE4))
        {
            static const XMVECTORF32 s_Scale = { { { 1.f / 15.f, 1.f / 15.f, 1.f / 15.f, 1.f / 15.f } } };
            const XMUNIBBLE4 * __restrict sPtr = static_cast<const XMUNIBBLE4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUNIBBLE4) + 1); icount += sizeof(XMUNIBBLE4))
            {
                XMVECTOR v = XMLoadUNibble4(sPtr++);
                v = XMVectorMultiply(v, s_Scale);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>(v);
            }
            return true;
        }
        return false;

    case XBOX_DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT:
        // Xbox One specific 7e3 format
        if (size >= sizeof(XMUDECN4))
        {
            const XMUDECN4 * __restrict sPtr = static_cast<const XMUDECN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUDECN4) + 1); icount += sizeof(XMUDECN4))
            {
                if (dPtr >= ePtr) break;

                XMVECTORF32 vResult = { { {
                    FloatFrom7e3(sPtr->x),
                    FloatFrom7e3(sPtr->y),
                    FloatFrom7e3(sPtr->z),
                    static_cast<float>(sPtr->v >> 30) / 3.0f
                } } };

                ++sPtr;

                *(dPtr++) = vResult.v;
            }
            return true;
        }
        return false;

    case XBOX_DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT:
        // Xbox One specific 6e4 format
        if (size >= sizeof(XMUDECN4))
        {
            const XMUDECN4 * __restrict sPtr = static_cast<const XMUDECN4*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(XMUDECN4) + 1); icount += sizeof(XMUDECN4))
            {
                if (dPtr >= ePtr) break;

                XMVECTORF32 vResult = { { {
                    FloatFrom6e4(sPtr->x),
                    FloatFrom6e4(sPtr->y),
                    FloatFrom6e4(sPtr->z),
                    static_cast<float>(sPtr->v >> 30) / 3.0f
                } } };

                ++sPtr;

                *(dPtr++) = vResult.v;
            }
            return true;
        }
        return false;

    case XBOX_DXGI_FORMAT_R10G10B10_SNORM_A2_UNORM:
        // Xbox One specific format
        LOAD_SCANLINE(XMXDECN4, XMLoadXDecN4)

    case XBOX_DXGI_FORMAT_R4G4_UNORM:
        // Xbox One specific format
        if (size >= sizeof(uint8_t))
        {
            static const XMVECTORF32 s_Scale = { { { 1.f / 15.f, 1.f / 15.f, 0.f, 0.f } } };
            const uint8_t * __restrict sPtr = static_cast<const uint8_t*>(pSource);
            for (size_t icount = 0; icount < (size - sizeof(uint8_t) + 1); icount += sizeof(uint8_t))
            {
                XMUNIBBLE4 nibble;
                nibble.v = static_cast<uint16_t>(*sPtr++);
                XMVECTOR v = XMLoadUNibble4(&nibble);
                v = XMVectorMultiply(v, s_Scale);
                if (dPtr >= ePtr) break;
                *(dPtr++) = XMVectorSelect(g_XMIdentityR3, v, g_XMSelect1100);
            }
            return true;
        }
        return false;

        // We don't support the planar or palettized formats

    default:
        return false;
    }
}