in train_retriever.py [0:0]
def evaluate(model, dataset, collator, opt):
sampler = SequentialSampler(dataset)
dataloader = DataLoader(
dataset,
sampler=sampler,
batch_size=opt.per_gpu_batch_size,
drop_last=False,
num_workers=10,
collate_fn=collator
)
model.eval()
if hasattr(model, "module"):
model = model.module
total = 0
eval_loss = []
avg_topk = {k:[] for k in [1, 2, 5] if k <= opt.n_context}
idx_topk = {k:[] for k in [1, 2, 5] if k <= opt.n_context}
inversions = []
with torch.no_grad():
for i, batch in enumerate(dataloader):
(idx, question_ids, question_mask, context_ids, context_mask, gold_score) = batch
_, _, scores, loss = model(
question_ids=question_ids.cuda(),
question_mask=question_mask.cuda(),
passage_ids=context_ids.cuda(),
passage_mask=context_mask.cuda(),
gold_score=gold_score.cuda(),
)
src.evaluation.eval_batch(scores, inversions, avg_topk, idx_topk)
total += question_ids.size(0)
inversions = src.util.weighted_average(np.mean(inversions), total, opt)[0]
for k in avg_topk:
avg_topk[k] = src.util.weighted_average(np.mean(avg_topk[k]), total, opt)[0]
idx_topk[k] = src.util.weighted_average(np.mean(idx_topk[k]), total, opt)[0]
return loss, inversions, avg_topk, idx_topk