in src/starspace.cpp [336:395]
Metrics StarSpace::evaluateOne(
const vector<Base>& lhs,
const vector<Base>& rhs,
vector<Predictions>& pred,
bool excludeLHS) {
std::priority_queue<Predictions> heap;
auto lhsM = model_->projectLHS(lhs);
auto rhsM = model_->projectRHS(rhs);
// Our evaluation function currently assumes there is only one correct label.
// TODO: generalize this to the multilabel case.
auto score = model_->similarity(lhsM, rhsM);
int rank = 1;
heap.push({ score, 0 });
for (unsigned int i = 0; i < baseDocVectors_.size(); i++) {
// in the case basedoc labels are not provided, all labels become basedoc,
// and we skip the correct label for comparison.
if ((args_->basedoc.empty()) && ((int)i == rhs[0].first - dict_->nwords())) {
continue;
}
auto cur_score = model_->similarity(lhsM, baseDocVectors_[i]);
if (cur_score > score) {
rank++;
} else if (cur_score == score) {
float flip = (float) rand() / RAND_MAX;
if (flip > 0.5) {
rank++;
}
}
heap.push({ cur_score, i + 1 });
}
// get the first K predictions
int i = 0;
while (i < args_->K && heap.size() > 0) {
Predictions heap_top = heap.top();
heap.pop();
bool keep = true;
if(excludeLHS && (args_->basedoc.empty())) {
int nwords = dict_->nwords();
auto it = std::find_if( lhs.begin(), lhs.end(),
[&heap_top, &nwords](const Base& el){ return (el.first - nwords + 1) == heap_top.second;} );
keep = it == lhs.end();
}
if(keep) {
pred.push_back(heap_top);
i++;
}
}
Metrics s;
s.clear();
s.update(rank);
return s;
}