fn encode_dxt_colors()

in image/src/codecs/dxt.rs [513:679]


fn encode_dxt_colors(source: &[u8], dest: &mut [u8]) {
    // sanity checks and determine stride when parsing the source data
    assert!((source.len() == 64 || source.len() == 48) && dest.len() == 8);
    let stride = source.len() / 16;

    // reference colors array
    let mut colors = [[0u8; 3]; 4];

    // Put the colors we're going to be processing in an array with pure RGB layout
    // note: we reverse the pixel order here. The reason for this is found in the inner quantization loop.
    let mut targets = [[0u8; 3]; 16];
    for (s, d) in source.chunks(stride).rev().zip(&mut targets) {
        *d = [s[0], s[1], s[2]];
    }

    // and a set of colors to pick from.
    let mut colorspace = targets.to_vec();

    // roundtrip all colors through the r5g6b5 encoding
    for rgb in &mut colorspace {
        *rgb = enc565_decode(enc565_encode(*rgb));
    }

    // and deduplicate the set of colors to choose from as the algorithm is O(N^2) in this
    colorspace.dedup();

    // in case of slight gradients it can happen that there's only one entry left in the color table.
    // as the resulting banding can be quite bad if we would just left the block at the closest
    // encodable color, we have a special path here that tries to emulate the wanted color
    // using the linear interpolation between gradients
    if colorspace.len() == 1 {
        // the base color we got from colorspace reduction
        let ref_rgb = colorspace[0];
        // the unreduced color in this block that's the furthest away from the actual block
        let mut rgb = targets
            .iter()
            .cloned()
            .max_by_key(|rgb| diff(*rgb, ref_rgb))
            .unwrap();
        // amplify differences by 2.5, which should push them to the next quantized value
        // if possible without overshoot
        for i in 0..3 {
            rgb[i] =
                ((i16::from(rgb[i]) - i16::from(ref_rgb[i])) * 5 / 2 + i16::from(ref_rgb[i])) as u8;
        }

        // roundtrip it through quantization
        let encoded = enc565_encode(rgb);
        let rgb = enc565_decode(encoded);

        // in case this didn't land us a different color the best way to represent this field is
        // as a single color block
        if rgb == ref_rgb {
            dest[0] = encoded as u8;
            dest[1] = (encoded >> 8) as u8;

            for d in dest.iter_mut().take(8).skip(2) {
                *d = 0;
            }
            return;
        }

        // we did find a separate value: add it to the options so after one round of quantization
        // we're done
        colorspace.push(rgb);
    }

    // block quantization loop: we basically just try every possible combination, returning
    // the combination with the least squared error
    // stores the best candidate colors
    let mut chosen_colors = [[0; 3]; 4];
    // did this index table use the [0,0,0] variant
    let mut chosen_use_0 = false;
    // error calculated for the last entry
    let mut chosen_error = 0xFFFF_FFFFu32;

    // loop through unique permutations of the colorspace, where c1 != c2
    'search: for (i, &c1) in colorspace.iter().enumerate() {
        colors[0] = c1;

        for &c2 in &colorspace[0..i] {
            colors[1] = c2;

            // what's inside here is ran at most 120 times.
            for use_0 in 0..2 {
                // and 240 times here.

                if use_0 != 0 {
                    // interpolate one color, set the other to 0
                    for i in 0..3 {
                        colors[2][i] =
                            ((u16::from(colors[0][i]) + u16::from(colors[1][i]) + 1) / 2) as u8;
                    }
                    colors[3] = [0, 0, 0];
                } else {
                    // interpolate to get 2 more colors
                    for i in 0..3 {
                        colors[2][i] =
                            ((u16::from(colors[0][i]) * 2 + u16::from(colors[1][i]) + 1) / 3) as u8;
                        colors[3][i] =
                            ((u16::from(colors[0][i]) + u16::from(colors[1][i]) * 2 + 1) / 3) as u8;
                    }
                }

                // calculate the total error if we were to quantize the block with these color combinations
                // both these loops have statically known iteration counts and are well vectorizable
                // note that the inside of this can be run about 15360 times worst case, i.e. 960 times per
                // pixel.
                let total_error = targets
                    .iter()
                    .map(|t| colors.iter().map(|c| diff(*c, *t) as u32).min().unwrap())
                    .sum();

                // update the match if we found a better one
                if total_error < chosen_error {
                    chosen_colors = colors;
                    chosen_use_0 = use_0 != 0;
                    chosen_error = total_error;

                    // if we've got a perfect or at most 1 LSB off match, we're done
                    if total_error < 4 {
                        break 'search;
                    }
                }
            }
        }
    }

    // calculate the final indices
    // note that targets is already in reverse pixel order, to make the index computation easy.
    let mut chosen_indices = 0u32;
    for t in &targets {
        let (idx, _) = chosen_colors
            .iter()
            .enumerate()
            .min_by_key(|&(_, c)| diff(*c, *t))
            .unwrap();
        chosen_indices = (chosen_indices << 2) | idx as u32;
    }

    // encode the colors
    let mut color0 = enc565_encode(chosen_colors[0]);
    let mut color1 = enc565_encode(chosen_colors[1]);

    // determine encoding. Note that color0 == color1 is impossible at this point
    if color0 > color1 {
        if chosen_use_0 {
            swap(&mut color0, &mut color1);
            // Indexes are packed 2 bits wide, swap index 0/1 but preserve 2/3.
            let filter = (chosen_indices & 0xAAAA_AAAA) >> 1;
            chosen_indices ^= filter ^ 0x5555_5555;
        }
    } else if !chosen_use_0 {
        swap(&mut color0, &mut color1);
        // Indexes are packed 2 bits wide, swap index 0/1 and 2/3.
        chosen_indices ^= 0x5555_5555;
    }

    // encode everything.
    dest[0] = color0 as u8;
    dest[1] = (color0 >> 8) as u8;
    dest[2] = color1 as u8;
    dest[3] = (color1 >> 8) as u8;
    for i in 0..4 {
        dest[i + 4] = (chosen_indices >> (i * 8)) as u8;
    }
}