Metrics StarSpace::evaluateOne()

in src/starspace.cpp [336:395]


Metrics StarSpace::evaluateOne(
    const vector<Base>& lhs,
    const vector<Base>& rhs,
    vector<Predictions>& pred,
    bool excludeLHS) {

  std::priority_queue<Predictions> heap;

  auto lhsM = model_->projectLHS(lhs);
  auto rhsM = model_->projectRHS(rhs);
  // Our evaluation function currently assumes there is only one correct label.
  // TODO: generalize this to the multilabel case.
  auto score = model_->similarity(lhsM, rhsM);

  int rank = 1;
  heap.push({ score, 0 });

  for (unsigned int i = 0; i < baseDocVectors_.size(); i++) {
    // in the case basedoc labels are not provided, all labels become basedoc,
    // and we skip the correct label for comparison.
    if ((args_->basedoc.empty()) && ((int)i == rhs[0].first - dict_->nwords())) {
      continue;
    }
    auto cur_score = model_->similarity(lhsM, baseDocVectors_[i]);
    if (cur_score > score) {
      rank++;
    } else if (cur_score == score) {
      float flip = (float) rand() / RAND_MAX;
      if (flip > 0.5) {
        rank++;
      }
    }
    heap.push({ cur_score, i + 1 });
  }

  // get the first K predictions
  int i = 0;
  while (i < args_->K && heap.size() > 0) {
    Predictions heap_top = heap.top();
    heap.pop();

    bool keep = true;
    if(excludeLHS && (args_->basedoc.empty())) {
      int nwords = dict_->nwords();
      auto it = std::find_if( lhs.begin(), lhs.end(),
                             [&heap_top, &nwords](const Base& el){ return (el.first - nwords + 1) == heap_top.second;} );
      keep = it == lhs.end();
    }

    if(keep) {
      pred.push_back(heap_top);
      i++;
    }
  }

  Metrics s;
  s.clear();
  s.update(rank);
  return s;
}