void EncodeBC1()

in DirectXTex/BC.cpp [370:685]


    void EncodeBC1(
        _Out_ D3DX_BC1 *pBC,
        _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pColor,
        bool bColorKey,
        float threshold,
        uint32_t flags) noexcept
    {
        assert(pBC && pColor);
        static_assert(sizeof(D3DX_BC1) == 8, "D3DX_BC1 should be 8 bytes");

        // Determine if we need to colorkey this block
        uint32_t uSteps;

        if (bColorKey)
        {
            size_t uColorKey = 0;

            for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
            {
                if (pColor[i].a < threshold)
                    uColorKey++;
            }

            if (NUM_PIXELS_PER_BLOCK == uColorKey)
            {
                pBC->rgb[0] = 0x0000;
                pBC->rgb[1] = 0xffff;
                pBC->bitmap = 0xffffffff;
                return;
            }

            uSteps = (uColorKey > 0) ? 3u : 4u;
        }
        else
        {
            uSteps = 4u;
        }

        // Quantize block to R56B5, using Floyd Stienberg error diffusion.  This
        // increases the chance that colors will map directly to the quantized
        // axis endpoints.
        HDRColorA Color[NUM_PIXELS_PER_BLOCK];
        HDRColorA Error[NUM_PIXELS_PER_BLOCK];

        if (flags & BC_FLAGS_DITHER_RGB)
            memset(Error, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(HDRColorA));

        size_t i;
        for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
        {
            HDRColorA Clr;
            Clr.r = pColor[i].r;
            Clr.g = pColor[i].g;
            Clr.b = pColor[i].b;
            Clr.a = 1.0f;

            if (flags & BC_FLAGS_DITHER_RGB)
            {
                Clr.r += Error[i].r;
                Clr.g += Error[i].g;
                Clr.b += Error[i].b;
            }

            Color[i].r = static_cast<float>(static_cast<int32_t>(Clr.r * 31.0f + 0.5f)) * (1.0f / 31.0f);
            Color[i].g = static_cast<float>(static_cast<int32_t>(Clr.g * 63.0f + 0.5f)) * (1.0f / 63.0f);
            Color[i].b = static_cast<float>(static_cast<int32_t>(Clr.b * 31.0f + 0.5f)) * (1.0f / 31.0f);

#ifdef COLOR_WEIGHTS
            Color[i].a = pColor[i].a;
#else
            Color[i].a = 1.0f;
#endif // COLOR_WEIGHTS

            if (flags & BC_FLAGS_DITHER_RGB)
            {
                HDRColorA Diff;
                Diff.r = Color[i].a * (Clr.r - Color[i].r);
                Diff.g = Color[i].a * (Clr.g - Color[i].g);
                Diff.b = Color[i].a * (Clr.b - Color[i].b);
                Diff.a = 0.0f;

                if (3 != (i & 3))
                {
                    assert(i < 15);
                    _Analysis_assume_(i < 15);
                    Error[i + 1].r += Diff.r * (7.0f / 16.0f);
                    Error[i + 1].g += Diff.g * (7.0f / 16.0f);
                    Error[i + 1].b += Diff.b * (7.0f / 16.0f);
                }

                if (i < 12)
                {
                    if (i & 3)
                    {
                        Error[i + 3].r += Diff.r * (3.0f / 16.0f);
                        Error[i + 3].g += Diff.g * (3.0f / 16.0f);
                        Error[i + 3].b += Diff.b * (3.0f / 16.0f);
                    }

                    Error[i + 4].r += Diff.r * (5.0f / 16.0f);
                    Error[i + 4].g += Diff.g * (5.0f / 16.0f);
                    Error[i + 4].b += Diff.b * (5.0f / 16.0f);

                    if (3 != (i & 3))
                    {
                        assert(i < 11);
                        _Analysis_assume_(i < 11);
                        Error[i + 5].r += Diff.r * (1.0f / 16.0f);
                        Error[i + 5].g += Diff.g * (1.0f / 16.0f);
                        Error[i + 5].b += Diff.b * (1.0f / 16.0f);
                    }
                }
            }

            if (!(flags & BC_FLAGS_UNIFORM))
            {
                Color[i].r *= g_Luminance.r;
                Color[i].g *= g_Luminance.g;
                Color[i].b *= g_Luminance.b;
            }
        }

        // Perform 6D root finding function to find two endpoints of color axis.
        // Then quantize and sort the endpoints depending on mode.
        HDRColorA ColorA, ColorB, ColorC, ColorD;

        OptimizeRGB(&ColorA, &ColorB, Color, uSteps, flags);

        if (flags & BC_FLAGS_UNIFORM)
        {
            ColorC = ColorA;
            ColorD = ColorB;
        }
        else
        {
            ColorC.r = ColorA.r * g_LuminanceInv.r;
            ColorC.g = ColorA.g * g_LuminanceInv.g;
            ColorC.b = ColorA.b * g_LuminanceInv.b;
            ColorC.a = ColorA.a;

            ColorD.r = ColorB.r * g_LuminanceInv.r;
            ColorD.g = ColorB.g * g_LuminanceInv.g;
            ColorD.b = ColorB.b * g_LuminanceInv.b;
            ColorD.a = ColorB.a;
        }

        uint16_t wColorA = Encode565(&ColorC);
        uint16_t wColorB = Encode565(&ColorD);

        if ((uSteps == 4) && (wColorA == wColorB))
        {
            pBC->rgb[0] = wColorA;
            pBC->rgb[1] = wColorB;
            pBC->bitmap = 0x00000000;
            return;
        }

        Decode565(&ColorC, wColorA);
        Decode565(&ColorD, wColorB);

        if (flags & BC_FLAGS_UNIFORM)
        {
            ColorA = ColorC;
            ColorB = ColorD;
        }
        else
        {
            ColorA.r = ColorC.r * g_Luminance.r;
            ColorA.g = ColorC.g * g_Luminance.g;
            ColorA.b = ColorC.b * g_Luminance.b;

            ColorB.r = ColorD.r * g_Luminance.r;
            ColorB.g = ColorD.g * g_Luminance.g;
            ColorB.b = ColorD.b * g_Luminance.b;
        }

        // Calculate color steps
        HDRColorA Step[4];

        if ((3 == uSteps) == (wColorA <= wColorB))
        {
            pBC->rgb[0] = wColorA;
            pBC->rgb[1] = wColorB;

            Step[0] = ColorA;
            Step[1] = ColorB;
        }
        else
        {
            pBC->rgb[0] = wColorB;
            pBC->rgb[1] = wColorA;

            Step[0] = ColorB;
            Step[1] = ColorA;
        }

        static const size_t pSteps3[] = { 0, 2, 1 };
        static const size_t pSteps4[] = { 0, 2, 3, 1 };
        const size_t *pSteps;

        if (3 == uSteps)
        {
            pSteps = pSteps3;

            HDRColorALerp(&Step[2], &Step[0], &Step[1], 0.5f);
        }
        else
        {
            pSteps = pSteps4;

            HDRColorALerp(&Step[2], &Step[0], &Step[1], 1.0f / 3.0f);
            HDRColorALerp(&Step[3], &Step[0], &Step[1], 2.0f / 3.0f);
        }

        // Calculate color direction
        HDRColorA Dir;
        Dir.r = Step[1].r - Step[0].r;
        Dir.g = Step[1].g - Step[0].g;
        Dir.b = Step[1].b - Step[0].b;
        Dir.a = 0.0f;

        auto fSteps = static_cast<float>(uSteps - 1);
        float fScale = (wColorA != wColorB) ? (fSteps / (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b)) : 0.0f;

        Dir.r *= fScale;
        Dir.g *= fScale;
        Dir.b *= fScale;

        // Encode colors
        uint32_t dw = 0;
        if (flags & BC_FLAGS_DITHER_RGB)
            memset(Error, 0x00, NUM_PIXELS_PER_BLOCK * sizeof(HDRColorA));

        for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
        {
            if ((3 == uSteps) && (pColor[i].a < threshold))
            {
                dw = (3u << 30) | (dw >> 2);
            }
            else
            {
                HDRColorA Clr;
                if (flags & BC_FLAGS_UNIFORM)
                {
                    Clr.r = pColor[i].r;
                    Clr.g = pColor[i].g;
                    Clr.b = pColor[i].b;
                }
                else
                {
                    Clr.r = pColor[i].r * g_Luminance.r;
                    Clr.g = pColor[i].g * g_Luminance.g;
                    Clr.b = pColor[i].b * g_Luminance.b;
                }
                Clr.a = 1.0f;

                if (flags & BC_FLAGS_DITHER_RGB)
                {
                    Clr.r += Error[i].r;
                    Clr.g += Error[i].g;
                    Clr.b += Error[i].b;
                }

                float fDot = (Clr.r - Step[0].r) * Dir.r + (Clr.g - Step[0].g) * Dir.g + (Clr.b - Step[0].b) * Dir.b;

                uint32_t iStep;
                if (fDot <= 0.0f)
                    iStep = 0;
                else if (fDot >= fSteps)
                    iStep = 1;
                else
                    iStep = uint32_t(pSteps[uint32_t(fDot + 0.5f)]);

                dw = (iStep << 30) | (dw >> 2);

                if (flags & BC_FLAGS_DITHER_RGB)
                {
                    HDRColorA Diff;
                    Diff.r = Color[i].a * (Clr.r - Step[iStep].r);
                    Diff.g = Color[i].a * (Clr.g - Step[iStep].g);
                    Diff.b = Color[i].a * (Clr.b - Step[iStep].b);
                    Diff.a = 0.0f;

                    if (3 != (i & 3))
                    {
                        Error[i + 1].r += Diff.r * (7.0f / 16.0f);
                        Error[i + 1].g += Diff.g * (7.0f / 16.0f);
                        Error[i + 1].b += Diff.b * (7.0f / 16.0f);
                    }

                    if (i < 12)
                    {
                        if (i & 3)
                        {
                            Error[i + 3].r += Diff.r * (3.0f / 16.0f);
                            Error[i + 3].g += Diff.g * (3.0f / 16.0f);
                            Error[i + 3].b += Diff.b * (3.0f / 16.0f);
                        }

                        Error[i + 4].r += Diff.r * (5.0f / 16.0f);
                        Error[i + 4].g += Diff.g * (5.0f / 16.0f);
                        Error[i + 4].b += Diff.b * (5.0f / 16.0f);

                        if (3 != (i & 3))
                        {
                            Error[i + 5].r += Diff.r * (1.0f / 16.0f);
                            Error[i + 5].g += Diff.g * (1.0f / 16.0f);
                            Error[i + 5].b += Diff.b * (1.0f / 16.0f);
                        }
                    }
                }
            }
        }

        pBC->bitmap = dw;
    }