in genai-on-vertex-ai/gemini/model_upgrades/text_classification/vertex_script/eval.py [0:0]
def run_eval(experiment_name: str, baseline_model: str, candidate_model: str, prompt_template_local_path: str, dataset_local_path: str):
timestamp = f"{datetime.now().strftime('%b-%d-%H-%M-%S')}".lower()
prompt_template = open(prompt_template_local_path).read()
task = EvalTask(
dataset=dataset_local_path,
metrics=[CustomMetric(name="accuracy", metric_function=case_insensitive_match)],
experiment=experiment_name
)
baseline_results = task.evaluate(
experiment_run_name=f"{timestamp}-{baseline_model.replace('.', '-')}",
prompt_template=prompt_template,
model=GenerativeModel(baseline_model)
)
candidate_results = task.evaluate(
experiment_run_name=f"{timestamp}-{candidate_model.replace('.', '-')}",
prompt_template=prompt_template,
model=GenerativeModel(candidate_model)
)
print("Baseline model accuracy:", baseline_results.summary_metrics["accuracy/mean"])
print("Candidate model accuracy:", candidate_results.summary_metrics["accuracy/mean"])