in finetune/run_classifier_azureml.py [0:0]
def run_evaluation(processor, output_mode, set_type):
examples = processor.get_dev_examples(args.data_dir) if set_type == "dev" else processor.get_test_examples(args.data_dir)
features = convert_examples_to_features(
examples, processor.get_labels(), args.max_seq_length, tokenizer, output_mode)
logger.info(f" Running Evaluation on {set_type}")
logger.info(" Num examples = %d", len(examples))
logger.info(" Batch size = %d", args.eval_batch_size)
all_input_ids = torch.tensor(
[f.input_ids for f in features], dtype=torch.long)
all_input_mask = torch.tensor(
[f.input_mask for f in features], dtype=torch.long)
all_segment_ids = torch.tensor(
[f.segment_ids for f in features], dtype=torch.long)
all_label_ids = None
if output_mode == "classification":
all_label_ids = torch.tensor(
[f.label_id for f in features], dtype=torch.long)
elif output_mode == "regression":
all_label_ids = torch.tensor(
[f.label_id for f in features], dtype=torch.float)
data = TensorDataset(
all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
# Run prediction for full data
eval_sampler = SequentialSampler(data)
dataloader = DataLoader(
data, sampler=eval_sampler, batch_size=args.eval_batch_size)
model.eval()
eval_loss = 0
nb_eval_steps = 0
preds = []
for input_ids, input_mask, segment_ids, label_ids in tqdm(dataloader, desc="Evaluating"):
input_ids = input_ids.to(device)
input_mask = input_mask.to(device)
segment_ids = segment_ids.to(device)
label_ids = label_ids.to(device)
with torch.no_grad():
logits = model(input_ids, segment_ids,
input_mask, labels=None)
# create eval loss and other metric required by the task
if set_type == "dev":
if output_mode == "classification":
if args.focal:
continue
else:
loss_fct = CrossEntropyLoss()
tmp_eval_loss = loss_fct(
logits.view(-1, num_labels), label_ids.view(-1))
elif output_mode == "regression":
loss_fct = MSELoss()
if args.fp16:
label_ids = label_ids.half()
tmp_eval_loss = loss_fct(
logits.view(-1), label_ids.view(-1))
eval_loss += tmp_eval_loss.mean().item()
nb_eval_steps += 1
if len(preds) == 0:
preds.append(logits.detach().cpu().numpy())
else:
preds[0] = np.append(
preds[0], logits.detach().cpu().numpy(), axis=0)
preds = preds[0]
if output_mode == "classification":
preds = np.argmax(preds, axis=1)
elif output_mode == "regression":
preds = np.squeeze(preds)
if set_type == "dev":
eval_loss = eval_loss / nb_eval_steps
result = compute_metrics(task_name, preds, all_label_ids.numpy())
loss = tr_loss/nb_tr_steps if args.do_train else None
result['eval_loss'] = eval_loss
result['global_step'] = global_step
result['loss'] = loss
logger.info("***** Evaluation results *****")
for key in sorted(result.keys()):
logger.info("Epoch %s: %s = %s", epoch_num,
key, str(result[key]))
if(epoch_num ==2):
run.log(key, str(result[key]))
if set_type == "test":
output_eval_file = os.path.join(args.output_dir, f"{task_name.upper()}-{args.seed}-{args.learning_rate}-ep-{epoch_num}-tot-epochs-{args.num_train_epochs}.tsv")
with open(output_eval_file, "w") as writer:
writer.write("index\tprediction\n")
for i, sample in enumerate(examples):
if output_mode == "classification":
writer.write(f"{sample.guid}\t{processor.get_labels()[preds[i].item()]}\n")
elif output_mode == "regression":
writer.write(f"{sample.guid}\t{preds[i].item()}\n")
return