in src/backend/vector/avx2/field.rs [213:234]
fn shuffle_lanes(x: u32x8, control: Shuffle) -> u32x8 {
unsafe {
use core::arch::x86_64::_mm256_permutevar8x32_epi32;
let c: u32x8 = match control {
Shuffle::AAAA => u32x8::new(0, 0, 2, 2, 0, 0, 2, 2),
Shuffle::BBBB => u32x8::new(1, 1, 3, 3, 1, 1, 3, 3),
Shuffle::CACA => u32x8::new(4, 0, 6, 2, 4, 0, 6, 2),
Shuffle::DBBD => u32x8::new(5, 1, 7, 3, 1, 5, 3, 7),
Shuffle::ADDA => u32x8::new(0, 5, 2, 7, 5, 0, 7, 2),
Shuffle::CBCB => u32x8::new(4, 1, 6, 3, 4, 1, 6, 3),
Shuffle::ABAB => u32x8::new(0, 1, 2, 3, 0, 1, 2, 3),
Shuffle::BADC => u32x8::new(1, 0, 3, 2, 5, 4, 7, 6),
Shuffle::BACD => u32x8::new(1, 0, 3, 2, 4, 5, 6, 7),
Shuffle::ABDC => u32x8::new(0, 1, 2, 3, 5, 4, 7, 6),
};
// Note that this gets turned into a generic LLVM
// shuffle-by-constants, which can be lowered to a simpler
// instruction than a generic permute.
_mm256_permutevar8x32_epi32(x.into_bits(), c.into_bits()).into_bits()
}
}