inline int ReQuantizeForFloat::f()

in include/fbgemm/OutputProcessing-inl.h [195:301]


inline int ReQuantizeForFloat<FUSE_RELU, Q_GRAN, outT, inT, nextOPType>::f(
    outT* out,
    inT* inp,
    const block_type_t& block,
    int ld_out,
    int ld_in) const {
  static_assert(
      std::is_same<int32_t, inT>::value,
      "input data type is of not expected type");
  static_assert(
      std::is_same<float, outT>::value,
      "output data type is of not expected type");
  int ncol_per_group = ncols_ / groups_;
  assert(
      block.col_size <= ncol_per_group &&
      "ReQuantizeOutput should be called at most 1 group at a time.");
  int g = block.col_start / ncol_per_group;
  if (instSet == inst_set_t::anyarch || !std::is_same<outT, float>::value) {
    for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
      for (int j = block.col_start; j < block.col_start + block.col_size; ++j) {
        inT raw = inp[(i - block.row_start) * ld_in + j - block.col_start];
        if (Aq_zero_point_) {
          raw -= Aq_zero_point_ * q_col_offsets_[j];
        }
        int Bq_zero_point_idx;
        if (Q_GRAN == QuantizationGranularity::TENSOR) {
          Bq_zero_point_idx = 0;
        } else if (Q_GRAN == QuantizationGranularity::GROUP) {
          Bq_zero_point_idx = g;
        } else if (Q_GRAN == QuantizationGranularity::OUT_CHANNEL) {
          Bq_zero_point_idx = j;
        } else {
          assert(false && "unknown quantization granularity");
        }
        if (q_row_offsets_) {
          raw -= q_row_offsets_[i - block.row_start] *
              Bq_zero_point_[Bq_zero_point_idx];
        }
        float res = raw * Aq_scale_ * Bq_scale_[Bq_zero_point_idx];
        if (bias_) {
          res += bias_[j];
        }
        out[i * ld_out + j] = res;
        if (FUSE_RELU) {
          out[i * ld_out + j] = std::max<outT>(0.0f, out[i * ld_out + j]);
        }
      }
    }
  } else if (instSet == inst_set_t::avx2 || instSet == inst_set_t::avx512) {
    bool b_symmetric =
        (Q_GRAN == QuantizationGranularity::TENSOR && Bq_zero_point_[0] == 0) ||
        q_row_offsets_ == nullptr;

    requantizationForFloatParams_t r = {
        Aq_zero_point_,
        Bq_zero_point_,
        Aq_scale_,
        Bq_scale_,
        q_row_offsets_,
        q_col_offsets_,
        bias_,
        ncols_,
        groups_};

    if (Aq_zero_point_ == 0) {
      if (b_symmetric) {
        if (bias_ == nullptr) {
          requantizeForFloatAvx2<true, true, Q_GRAN, false, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        } else {
          requantizeForFloatAvx2<true, true, Q_GRAN, true, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        }
      } else {
        if (bias_ == nullptr) {
          requantizeForFloatAvx2<true, false, Q_GRAN, false, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        } else {
          requantizeForFloatAvx2<true, false, Q_GRAN, true, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        }
      }
    } else {
      if (b_symmetric) {
        if (bias_ == nullptr) {
          requantizeForFloatAvx2<false, true, Q_GRAN, false, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        } else {
          requantizeForFloatAvx2<false, true, Q_GRAN, true, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        }
      } else {
        if (bias_ == nullptr) {
          requantizeForFloatAvx2<false, false, Q_GRAN, false, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        } else {
          requantizeForFloatAvx2<false, false, Q_GRAN, true, FUSE_RELU>(
              out, inp, block, ld_out, ld_in, r);
        }
      }
    }
  } else {
    assert(0 && "Not supported yet");
  }

  return nextop_.template f<instSet>(out, out, block, ld_out, ld_out);
}