in ms_marco_eval.py [0:0]
def load_candidate_from_stream(f):
"""Load candidate data from a stream.
Args:f (stream): stream to load.
Returns:qid_to_ranked_candidate_passages (dict): dictionary mapping from query_id (int) to a list of 1000 passage ids(int) ranked by relevance and importance
"""
qid_to_ranked_candidate_passages = {}
for l in f:
try:
l = l.strip().split('\t')
qid = int(l[0])
pid = int(l[1])
rank = int(l[2])
if qid in qid_to_ranked_candidate_passages:
pass
else:
# By default, all PIDs in the list of 1000 are 0. Only override those that are given
tmp = [0] * 1000
qid_to_ranked_candidate_passages[qid] = tmp
qid_to_ranked_candidate_passages[qid][rank-1]=pid
except:
raise IOError('\"%s\" is not valid format' % l)
return qid_to_ranked_candidate_passages