def compare_eval_runs()

in genai-on-vertex-ai/gemini/evals_playbook/utils/evals_playbook.py [0:0]


    def compare_eval_runs(self, experiment_run_ids, as_dict=False):
        if not experiment_run_ids:
            raise Exception(f"experiment_run_ids are required to compare runs")

        if isinstance(experiment_run_ids, str):
            experiment_run_ids = [experiment_run_ids]
        if isinstance(experiment_run_ids, list):
            experiment_run_ids = ", ".join([f"'{run}'" for run in experiment_run_ids])

        table_prefix = f"{cfg.PROJECT_ID}.{cfg.BQ_DATASET_ID}"
        client = bigquery.Client(project=cfg.PROJECT_ID)

        sql = f"""
        SELECT
            runs.task_id,
            runs.run_id,
            runs.experiment_id,
            exp.experiment_desc,
            exp.model_endpoint, 
            exp.model_name,
            exp.generation_config,
            prompt.prompt_template,
            prompt.system_instruction,
            runs.metrics,
            runs.create_datetime
        FROM 
            `{table_prefix}.{BQ_TABLE_MAP.get('runs').get('table_name')}` runs
        JOIN 
            `{table_prefix}.{BQ_TABLE_MAP.get('experiments').get('table_name')}` exp
        ON 
            runs.experiment_id = exp.experiment_id
        LEFT JOIN 
            `{table_prefix}.{BQ_TABLE_MAP.get('prompts').get('table_name')}` prompt
        ON 
            exp.prompt_id = prompt.prompt_id
        WHERE runs.run_id IN ({experiment_run_ids})
        ORDER BY runs.create_datetime DESC
        """
        
        df = client.query_and_wait(sql).to_dataframe()

        # format metrics
        df['metrics'] = df['metrics'].apply(eval)
        df['generation_config'] = df['generation_config'].apply(eval)

        # print(f'df:  {df.columns}')
        # print(f"generation_config:  {df['generation_config']}")
        df_metrics_exp = pd.json_normalize(df['metrics'])
        df_config_exp = pd.json_normalize(df['generation_config'])

        df = pd.concat([df.drop(['metrics', 'generation_config'], axis=1), df_metrics_exp, df_config_exp], axis=1)
        
        # print(f'df_config_exp:  {df_config_exp.columns}')
        # print(f'df_metrics_exp:  {df_metrics_exp.columns}')
        # print(f'df:  {df.columns}')


        if as_dict:
            return df.T.to_dict(orient='records')
        else:
            return df.T