evaluation_pipeline/evaluation.py [157:175]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def run_traditional_eval(query_id, query, relevant_docs, retrieved_docs, retrieved_distances, k):
    row = {'query_id': query_id}
    row['query'] = query
    # calcuate traditional IR metrics
    precision = calc_precision_at_k(relevant_docs, retrieved_docs, k)
    recall = calc_recall_at_k(relevant_docs, retrieved_docs, k)
    ndcg = calc_ndcg(relevant_docs, retrieved_docs,score_type='rank',retrieved_distances=retrieved_distances, k=k)
    reciprocal_rank = calc_reciprocal_rank(relevant_docs, retrieved_docs)
    average_precision = calc_average_precision(relevant_docs, retrieved_docs, k=k)

    # store in row
    row['retrieved_ids'] = retrieved_docs
    row['relevant_docs'] = relevant_docs
    row[f'precision@{k}'] = precision
    row[f'recall@{k}'] = recall
    row[f'ndcg@{k}'] = ndcg
    row['reciprocal_rank'] = reciprocal_rank
    row['average_precision'] = average_precision
    return row
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



