in genai-on-vertex-ai/gemini/evals_playbook/utils/evals_playbook.py [0:0]
def compare_eval_runs(self, experiment_run_ids, as_dict=False):
if not experiment_run_ids:
raise Exception(f"experiment_run_ids are required to compare runs")
if isinstance(experiment_run_ids, str):
experiment_run_ids = [experiment_run_ids]
if isinstance(experiment_run_ids, list):
experiment_run_ids = ", ".join([f"'{run}'" for run in experiment_run_ids])
table_prefix = f"{cfg.PROJECT_ID}.{cfg.BQ_DATASET_ID}"
client = bigquery.Client(project=cfg.PROJECT_ID)
sql = f"""
SELECT
runs.task_id,
runs.run_id,
runs.experiment_id,
exp.experiment_desc,
exp.model_endpoint,
exp.model_name,
exp.generation_config,
prompt.prompt_template,
prompt.system_instruction,
runs.metrics,
runs.create_datetime
FROM
`{table_prefix}.{BQ_TABLE_MAP.get('runs').get('table_name')}` runs
JOIN
`{table_prefix}.{BQ_TABLE_MAP.get('experiments').get('table_name')}` exp
ON
runs.experiment_id = exp.experiment_id
LEFT JOIN
`{table_prefix}.{BQ_TABLE_MAP.get('prompts').get('table_name')}` prompt
ON
exp.prompt_id = prompt.prompt_id
WHERE runs.run_id IN ({experiment_run_ids})
ORDER BY runs.create_datetime DESC
"""
df = client.query_and_wait(sql).to_dataframe()
# format metrics
df['metrics'] = df['metrics'].apply(eval)
df['generation_config'] = df['generation_config'].apply(eval)
# print(f'df: {df.columns}')
# print(f"generation_config: {df['generation_config']}")
df_metrics_exp = pd.json_normalize(df['metrics'])
df_config_exp = pd.json_normalize(df['generation_config'])
df = pd.concat([df.drop(['metrics', 'generation_config'], axis=1), df_metrics_exp, df_config_exp], axis=1)
# print(f'df_config_exp: {df_config_exp.columns}')
# print(f'df_metrics_exp: {df_metrics_exp.columns}')
# print(f'df: {df.columns}')
if as_dict:
return df.T.to_dict(orient='records')
else:
return df.T