void Engine::PrepareBTransposed()

in gemmology.h [1177:1198]


void Engine<Arch>::PrepareBTransposed(const float *input, int8_t *output,
                                      float quant_mult, size_t cols,
                                      size_t rows) {
  using batch8 = xsimd::batch<int8_t, Arch>;
  const size_t RegisterElemsInt = batch8::size;
  const size_t kColStride = 8;

  xsimd::batch<float, Arch> q(quant_mult);
  auto *output_it = reinterpret_cast<batch8 *>(output);
  size_t r = 0;
  size_t c = 0;
  while (r < rows) {
    for (size_t ri = 0; ri < 8; ++ri)
      *output_it++ = QuantizeTile8::ConsecutiveWithWrapping(
          q, input + (r + ri) * cols + c, cols - c, cols, 8);
    c += RegisterElemsInt;
    while (c >= cols) {
      r += kColStride;
      c -= cols;
    }
  }
}