in evalbench/evalbench.py [0:0]
def main(argv: Sequence[str]):
try:
logging.info("EvalBench v1.0.0")
session: dict = {}
parsed_config = load_yaml_config(_EXPERIMENT_CONFIG.value)
if parsed_config == "":
logging.error("No Eval Config Found.")
return
set_session_configs(session, parsed_config)
# Load the configs
config, db_configs, model_config, setup_config = load_session_configs(session)
logging.info("Loaded Configurations in %s", _EXPERIMENT_CONFIG.value)
# Load the dataset
dataset = load_dataset_from_json(session["dataset_config"], config)
# Load the evaluator
evaluator = Orchestrator(config, db_configs, setup_config, report_progress=True)
# Run evaluations
evaluator.evaluate(flatten_dataset(dataset))
job_id, run_time, results_tf, scores_tf = evaluator.process()
# Create Dataframes for reporting
reporters = get_reporters(parsed_config.get("reporting"), job_id, run_time)
config_df = config_to_df(job_id, run_time, config, model_config, db_configs)
results = load_json(results_tf)
results_df = report.get_dataframe(results)
report.quick_summary(results_df)
scores = load_json(scores_tf)
scores_df, summary_scores_df = analyzer.analyze_result(scores, config)
summary_scores_df["job_id"] = job_id
summary_scores_df["run_time"] = run_time
# Store the reports in specified outputs
for reporter in reporters:
reporter.store(config_df, report.STORETYPE.CONFIGS)
reporter.store(results_df, report.STORETYPE.EVALS)
reporter.store(scores_df, report.STORETYPE.SCORES)
reporter.store(summary_scores_df, report.STORETYPE.SUMMARY)
reporter.print_dashboard_links()
print(f"Finished Job ID {job_id}")
except Exception as e:
logging.error(e)
finally:
if _IN_COLAB:
return sys.exit(0)
return os._exit(0)