LogisticRegressorQualityEstimator::Matrix LogisticRegressorQualityEstimator::extractFeatures()

in inference/src/translator/quality_estimator.cpp [187:239]


LogisticRegressorQualityEstimator::Matrix LogisticRegressorQualityEstimator::extractFeatures(
    const std::vector<SubwordRange>& wordIndices, const std::vector<float>& logProbs) const {
  if (wordIndices.empty()) {
    return std::move(Matrix(0, 0));
  }
  // The number of features (numFeatures), which is currently must be 4
  Matrix features(wordIndices.size(), /*numFeatures =*/4);
  size_t featureRow = 0;
  // I_MEAN = index position in the feature vector hat represents the mean of log probability of a given word
  // I_MIN = index position  in the feature vector that represents the minimum of log probability of a given word
  // I_NUM_SUBWORDS = index position in the feature vector that represents the number of subwords that compose a given
  // I_OVERALL_MEAN = index position in the feature vector that represents the overall log probability score in the
  // entire sequence
  const size_t I_MEAN{0}, I_MIN{1}, I_NUM_SUBWORDS{2}, I_OVERALL_MEAN{3};

  float overallMean = 0.0;
  size_t numlogProbs = 0;

  for (const SubwordRange& wordIndice : wordIndices) {
    if (wordIndice.begin == wordIndice.end) {
      ++featureRow;
      continue;
    }

    float minScore = std::numeric_limits<float>::max();

    for (size_t i = wordIndice.begin; i < wordIndice.end; ++i) {
      ++numlogProbs;
      overallMean += logProbs[i];
      features.at(featureRow, I_MEAN) += logProbs[i];

      minScore = std::min<float>(logProbs[i], minScore);
    }

    features.at(featureRow, I_MEAN) /= static_cast<float>(wordIndice.size());
    features.at(featureRow, I_MIN) = minScore;
    features.at(featureRow, I_NUM_SUBWORDS) = wordIndice.size();

    ++featureRow;
  }

  if (numlogProbs == 0) {
    return std::move(Matrix(0, 0));
  }

  overallMean /= wordIndices.rbegin()->end;

  for (int i = 0; i < features.rows; ++i) {
    features.at(i, I_OVERALL_MEAN) = overallMean;
  }

  return std::move(features);
}