def get_eval_runs()

in genai-on-vertex-ai/gemini/evals_playbook/utils/evals_playbook.py [0:0]


    def get_eval_runs(self, experiment_id, experiment_run_id: str="", task_id: str="", as_dict=False):
        where_keys = {}
        if not experiment_run_id:
            print("[INFO] experiment_run_id not passed. Showing last 5 runs (if available).")
            limit_offset = 5
        else:
            where_keys["run_id"] = experiment_run_id
            limit_offset = 1
        if experiment_id:
            where_keys["experiment_id"] = experiment_id
            if task_id:
                where_keys["task_id"] = task_id

            # get experiment
            exp_df = self.get_experiment(experiment_id=experiment_id)
            exp_df = exp_df[["experiment_id", "experiment_desc", "prompt_id", "model_endpoint", "model_name", "generation_config"]]
            model_config_df_exp = pd.json_normalize(exp_df['generation_config'])
            exp_df = pd.concat([exp_df.drop('generation_config', axis=1), model_config_df_exp], axis=1)
            # get metrics
            metrics_df = self._get_one("runs", where_keys, limit_offset=limit_offset, as_dict=False)
            metrics_df = metrics_df[['experiment_id', 'run_id',  'metrics', 'task_id', 'create_datetime', 'update_datetime', 'tags']]
            metrics_df = pd.merge(exp_df, metrics_df, on='experiment_id', how='left')
            metrics_df['metrics'] = metrics_df['metrics'].apply(eval)
            metrics_df_exp = pd.json_normalize(metrics_df['metrics'])
            metrics_df = pd.concat([metrics_df.drop('metrics', axis=1), metrics_df_exp], axis=1)
            if as_dict:
                return metrics_df.T.to_dict(orient='records')
            else:
                return metrics_df.T
        else:
            raise Exception(f"experiment_id is required.")