in utils_ranking.py [0:0]
def _create_examples(self, all_data, set_type, num_cand):
"""Creates examples for the training and dev sets. In this method, text_b includes all the candidates."""
examples = []
for idx_i, data in tqdm(enumerate(all_data), total=len(all_data), desc="Creating examples"):
batch_examples = []
for idx_j, line in enumerate(data['candidates']):
guid = "%s-%s" % (set_type, f"example_{idx_i}_index_{idx_j}") # example_1_index_7
text_a = data['question']
text_b = line['article_title'] + '[title]' + line['text']
label = line['judge']['judge_contain_some'] if isinstance(line['judge'], dict) else line['judge']
if idx_j == 0:
if label != 1:
pdb.set_trace()
assert label == 1
else:
if label != 0:
pdb.set_trace()
assert label == 0
batch_examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=0))
assert len(batch_examples) == num_cand, f"you need {num_cand} candidates, but you have {len(batch_examples)}"
examples.append(batch_examples.copy())
return examples