in gemmology.h [1066:1069]
void Write::operator()(xsimd::batch<float, Arch> result, size_t row_idx, size_t col_idx, size_t col_size) { result.store_aligned(output_addr + row_idx * col_size + col_idx); }