in scripts/regression.py [0:0]
def eval_models(args, branch=None):
if branch is not None:
if os.system(f"git checkout {branch}") != 0:
return {}, 0
branch = branch or initial_branch
start_time = time.time()
results = {}
for model in args.models:
model_type = (
"hf-causal"
if model in causal_models
else "hf-seq2seq"
if model in seq2seq_models
else args.model
)
model_args = f"pretrained={model},{args.model_args}"
# TODO: split_and_pad_windows in AutoSeq2SeqLM doesn"t exist, #527
tasks = (
args.tasks
if model in causal_models or model_type == "hf-causal"
else list(filter(lambda task: task not in perplexity_tasks, args.tasks))
)
# TODO: OOM with auto for seq2seq models, also can OOM with llama
batch_size = (
args.batch_size
if model in causal_models or model_type == "hf-causal"
else 64
if args.batch_size == "auto"
else args.batch_size
)
output_path = (
f"data/regression/{int(start_time)}-{branch}-{Path(model).name}.json"
)
command = (
f"python3 main.py --model {model_type} --model_args {model_args} --tasks {','.join(tasks)} "
f"--num_fewshot {args.num_fewshot}{'' if args.limit is None else f' --limit {args.limit}'} "
f"--batch_size {batch_size} --no_cache --output_path {output_path}"
)
print(
f"{'=' * 80}\nEvaluating {model} on {', '.join(tasks)} at {branch} with:\n\n{command}\n{'=' * 80}"
)
ret = os.system(command)
results[model] = (
json.load(open(output_path, encoding="utf-8"))
if ret == 0
else {"results": {}}
)
end_time = time.time()
return results, end_time - start_time