in test.py [0:0]
def run(logger, task, metaicl_data, metaicl_model, train_data, dev_data, seed,
checkpoint, is_classification, add_newlines):
if args.do_zeroshot:
split_name = args.split
if args.is_null:
split_name += "-null"
cache_path = os.path.join(args.out_dir,
"{}-{}-{}{}{}{}.pkl".format(
task,
split_name,
metaicl_data.method,
"-k={}".format(args.k) if args.use_demonstrations else "",
"-s={}".format(seed) if args.use_demonstrations else "",
"" if add_newlines else "-no-newlines"))
else:
assert add_newlines
cache_path = os.path.join(args.out_dir, "{}-{}-{}{}{}.pkl".format(
task,
args.split,
metaicl_data.method,
"-k={}".format(args.k) if args.use_demonstrations else "",
"-s={}".format(seed) if args.use_demonstrations else ""
))
metaicl_data.tensorize(train_data, dev_data, add_newlines=add_newlines)
metaicl_data.print_tensorized_example()
logger.info(cache_path)
if os.path.exists(cache_path):
with open(cache_path, "rb") as f:
losses = pkl.load(f)
else:
if metaicl_model.is_none():
metaicl_model.load(checkpoint)
metaicl_model.cuda()
metaicl_model.eval()
losses = metaicl_model.do_inference(metaicl_data, args.test_batch_size)
with open(cache_path, "wb") as f:
pkl.dump(losses, f)
assert len(losses)==len(metaicl_data)
if args.is_null:
return None
if args.use_calibration:
assert args.do_zeroshot
bias_path = cache_path.replace("/"+task+"-"+args.split, "/"+task+"-"+args.split+"-null")
assert os.path.exists(bias_path), bias_path
with open(bias_path, "rb") as f:
bias_losses = pkl.load(f)
losses = np.array(losses)
bias_losses = np.array(bias_losses)
assert losses.shape == bias_losses.shape
losses -= bias_losses
predictions = metaicl_model.do_predict(metaicl_data, losses=losses)
groundtruths = [dp["output"] for dp in dev_data]
perf = metaicl_data.evaluate(predictions, groundtruths, is_classification)
logger.info("Accuracy=%s" % perf)
return perf