in evalbench/reporting/analyzer.py [0:0]
def analyze_result(scores, experiment_config: dict[str, str]):
"""Analyze accuracy result from dataframe."""
summary_scores = []
df = pd.DataFrame.from_dict(scores)
scorers = experiment_config["scorers"]
num_scorers = len(scorers)
for metric_name in scorers:
metric_name = metric_name.strip()
metric_score = 100
summary = analyze_one_metric(
df=df,
metric_name=metric_name,
metric_score=metric_score,
num_scorers=num_scorers,
)
summary_scores.append(summary)
summary = analyze_one_metric(
df=df,
metric_name="executable",
metric_score=1,
execution=True,
num_scorers=num_scorers,
)
summary_scores.append(summary)
summary_scores_df = pd.DataFrame.from_dict(summary_scores)
df[
[
"generated_error",
"comparator",
"comparison_error",
"generated_sql",
"job_id",
"id",
]
] = df[
[
"generated_error",
"comparator",
"comparison_error",
"generated_sql",
"job_id",
"id",
]
].astype(
"string"
)
return df, summary_scores_df