in train_reader.py [0:0]
def evaluate(model, dataset, tokenizer, collator, opt):
sampler = SequentialSampler(dataset)
dataloader = DataLoader(dataset,
sampler=sampler,
batch_size=opt.per_gpu_batch_size,
drop_last=False,
num_workers=10,
collate_fn=collator
)
model.eval()
total = 0
exactmatch = []
model = model.module if hasattr(model, "module") else model
with torch.no_grad():
for i, batch in enumerate(dataloader):
(idx, _, _, context_ids, context_mask) = batch
outputs = model.generate(
input_ids=context_ids.cuda(),
attention_mask=context_mask.cuda(),
max_length=50
)
for k, o in enumerate(outputs):
ans = tokenizer.decode(o, skip_special_tokens=True)
gold = dataset.get_example(idx[k])['answers']
score = src.evaluation.ems(ans, gold)
total += 1
exactmatch.append(score)
exactmatch, total = src.util.weighted_average(np.mean(exactmatch), total, opt)
return exactmatch