in gemmology.h [1089:1097]
void Write::operator()(
std::tuple<xsimd::batch<int32_t, Arch>, xsimd::batch<int32_t, Arch>> result,
size_t row_idx, size_t col_idx, size_t col_size) {
xsimd::bitwise_cast<float>(std::get<0>(result))
.store_aligned(output_addr + row_idx * col_size + col_idx + 0);
xsimd::bitwise_cast<float>(std::get<1>(result))
.store_aligned(output_addr + row_idx * col_size + col_idx +
xsimd::batch<int32_t, Arch>::size);
}