in genai-on-vertex-ai/gemini/evals_playbook/utils/evals_playbook.py [0:0]
def log_eval_run(self,
experiment_run_id: str,
experiment,
eval_result,
run_path,
tags=[],
metadata={}):
# log run details
if not isinstance(eval_result, EvalResult):
raise Exception(f"Invalid eval_result object. Expected: `vertexai.evaluation.EvalResult` Actual: {type(eval_result)}")
if isinstance(experiment, dict):
experiment = self.Experiment(**experiment)
if not isinstance(experiment, self.Experiment):
raise Exception(f"Invalid experiment object. Expected: `Experiment` Actual: {type(experiment)}")
# get run details from the Rapid Eval evaluation task
detail_df = eval_result.metrics_table.to_dict(orient="records")
summary_dict = eval_result.summary_metrics
non_metric_keys = ['context', 'reference', 'instruction', 'dataset_row_id', 'completed_prompt', 'response']
# report_df = eval_result.metrics_table
print(f'detail_df.keys: {detail_df[0].keys()}')
# prepare run details
run_details = []
for row in detail_df:
row.get("prompt")
metrics = {k: row[k] for k in row if k not in non_metric_keys}
run_detail = dict(
run_id=experiment_run_id,
experiment_id=experiment.experiment_id,
task_id=experiment.task_id,
dataset_row_id=row.get("dataset_row_id"),
system_instruction=row.get("instruction"),
input_prompt_gcs_uri=self.save_prompt(row.get("prompt"), run_path, row.get("dataset_row_id")),
output_text=row.get("response"),
ground_truth=row.get("reference"),
metrics=json.dumps(metrics),
# additional fields
latencies=[],
create_datetime=datetime.datetime.now(),
update_datetime=datetime.datetime.now(),
tags=tags,
metadata=json.dumps(metadata) if isinstance(metadata, dict) else None
)
run_details.append(run_detail)
try:
self._upsert("run_details", run_details)
except Exception as e:
print(f"Failed to log run details due to following error.")
raise e
# prepare run summary metrics
run_summary = dict(
run_id=experiment_run_id,
experiment_id=experiment.experiment_id,
task_id=experiment.task_id,
# dataset_row_id = experiment.dataset_row_id,
metrics=json.dumps(summary_dict),
# additional fields
create_datetime=datetime.datetime.now(),
update_datetime=datetime.datetime.now(),
tags=tags,
metadata=json.dumps(metadata) if isinstance(metadata, dict) else None
)
try:
self._upsert("runs", run_summary)
except Exception as e:
print(f"Failed to log run summary due to following error.")
raise e