in math/src/fft/concurrent.rs [179:198]
fn transpose_square_1<T>(matrix: &mut [T], size: usize) {
debug_assert_eq!(matrix.len(), size * size);
if size % 2 != 0 {
unimplemented!("odd sizes are not supported");
}
// iterate over upper-left triangle, working in 2x2 blocks
for row in (0..size).step_by(2) {
let i = row * size + row;
matrix.swap(i + 1, i + size);
for col in (row..size).step_by(2).skip(1) {
let i = row * size + col;
let j = col * size + row;
matrix.swap(i, j);
matrix.swap(i + 1, j + size);
matrix.swap(i + size, j + 1);
matrix.swap(i + size + 1, j + size + 1);
}
}
}