in exec_application_eval.py [0:0]
def eval_application(args):
# load model & tokenizer
if 'llama' in args.model_name:
model, tokenizer = load_llama_models_tokenizer(args)
else:
model, tokenizer = load_models_tokenizer(args)
# 载入评测集
dataset = load_dataset(args.eval_type)
# 大模型推理回答&记录答案
responses = []
for _, record in tqdm(dataset.iterrows()):
prompt = record['instruction']
model_response, _ = model.chat(
tokenizer,
prompt,
history=None,
)
responses.append(model_response)
result_path = os.path.join(args.save_result_dir, f"{args.model_name}_application_result.json")
if args.save_result_dir:
dataset["model_response"] = responses
os.makedirs(args.save_result_dir, exist_ok=True)
dataset.to_json(result_path, orient='records', force_ascii=False)
# 计算应用评分
get_application_score(args)