def eval_application()

in exec_application_eval.py [0:0]


def eval_application(args):
    # load model & tokenizer
    if 'llama' in args.model_name:
        model, tokenizer = load_llama_models_tokenizer(args)
    else:
        model, tokenizer = load_models_tokenizer(args)

    # 载入评测集
    dataset = load_dataset(args.eval_type)

    # 大模型推理回答&记录答案
    responses = []
    for _, record in tqdm(dataset.iterrows()):
        prompt = record['instruction']
        model_response, _ = model.chat(
            tokenizer,
            prompt,
            history=None,
        )
        responses.append(model_response)
    result_path = os.path.join(args.save_result_dir, f"{args.model_name}_application_result.json")
    if args.save_result_dir:
        dataset["model_response"] = responses
        os.makedirs(args.save_result_dir, exist_ok=True)
        dataset.to_json(result_path, orient='records', force_ascii=False)

    # 计算应用评分
    get_application_score(args)