in gemmology.h [1007:1027]
void SelectColumnsOfB(const xsimd::batch<int8_t, Arch> *input,
xsimd::batch<int8_t, Arch> *output,
size_t rows_bytes /* number of bytes in a row */,
const IntegerTy *cols_begin, const IntegerTy *cols_end) {
using batch8 = xsimd::batch<int8_t, Arch>;
/* Do columns for multiples of 8.*/
size_t register_rows = rows_bytes / batch8::size;
const batch8 *starts[8];
for (; cols_begin != cols_end; cols_begin += 8) {
for (size_t k = 0; k < 8; ++k) {
starts[k] =
input + (cols_begin[k] & 7) + (cols_begin[k] & ~7) * register_rows;
}
for (size_t r = 0; r < register_rows; ++r) {
for (size_t k = 0; k < 8; ++k) {
*(output++) = *starts[k];
starts[k] += 8;
}
}
}
}