3-4o_mini_fine_tuning/3_eval_fine_tune.py (56 lines of code) (raw):

# %% import pandas as pd import os import yaml # Load the test dataset df = pd.read_csv("./data/customer_service_chat_triage_n=100_test.csv") # Generate promptfoo eval file test_name = "customer_service_chat_triage_ft_n={}".format(len(df)) ft_model_name = "ft:gpt-4o-mini-2024-07-18:openai-internal::9rMH8pFU" output_dict = { "description": test_name, "prompts": [ { "id": "../prompts/verbatim_input.txt", "label": "verbatim_input", }, { "id": "../prompts/cot3.json", "label": "cot3", }, ], "providers": [ { "id": "openai:chat:gpt-3.5-turbo", "prompts": ["verbatim_input", "cot3"], }, { "id": "openai:chat:gpt-4o-mini", "prompts": ["verbatim_input", "cot3"], }, { "id": "openai:chat:gpt-4o", "prompts": ["verbatim_input", "cot3"], }, { "id": "openai:chat:{}".format(ft_model_name), "prompts": ["cot3"], }, ], "tests": [ { "vars": { "input": row["prompt"], "target": row["correct_output"], }, "assert": [ { "type": "python", "value": "file://../py/assert_last_line_answer.py", }, ], } for _, row in df.iterrows() ], } os.makedirs("./evals", exist_ok=True) with open("./evals/{}.yaml".format(test_name), "w") as file: yaml.dump(output_dict, file, default_flow_style=False) print("Saved promptfoo file to {}.yaml".format(test_name))