def analyze_result()

in evalbench/reporting/analyzer.py [0:0]


def analyze_result(scores, experiment_config: dict[str, str]):
    """Analyze accuracy result from dataframe."""
    summary_scores = []
    df = pd.DataFrame.from_dict(scores)
    scorers = experiment_config["scorers"]
    num_scorers = len(scorers)
    for metric_name in scorers:
        metric_name = metric_name.strip()
        metric_score = 100
        summary = analyze_one_metric(
            df=df,
            metric_name=metric_name,
            metric_score=metric_score,
            num_scorers=num_scorers,
        )
        summary_scores.append(summary)

    summary = analyze_one_metric(
        df=df,
        metric_name="executable",
        metric_score=1,
        execution=True,
        num_scorers=num_scorers,
    )

    summary_scores.append(summary)
    summary_scores_df = pd.DataFrame.from_dict(summary_scores)
    df[
        [
            "generated_error",
            "comparator",
            "comparison_error",
            "generated_sql",
            "job_id",
            "id",
        ]
    ] = df[
        [
            "generated_error",
            "comparator",
            "comparison_error",
            "generated_sql",
            "job_id",
            "id",
        ]
    ].astype(
        "string"
    )
    return df, summary_scores_df