def performance_loss_per_task()

in evals/elsuite/error_recovery/scripts/make_plots.py [0:0]


def performance_loss_per_task(metrics_df: pd.DataFrame, results_df: pd.DataFrame, out_dir: Path):
    # Plot performance lost for each model
    unique_models = get_unique_models(results_df)
    get_all_tasks(results_df)
    all_tasks_renamed = get_all_tasks_renamed(results_df)
    all_tasks_pretty = [TASK_NAMES[i] for i in all_tasks_renamed]

    all_metrics = {}
    all_errors = {}
    for model in unique_models:
        metrics = []
        errors = []
        for task in all_tasks_renamed:
            model_mask = metrics_df.solver == model
            task_mask = metrics_df.task == task
            CR_corrects = metrics_df[model_mask & task_mask]["CR_correct"]
            IR_corrects = metrics_df[model_mask & task_mask]["IR_correct"]

            performance_loss, performance_loss_error = corrects_to_performance_loss_and_error(
                CR_corrects, IR_corrects
            )
            metrics.append(performance_loss)
            errors.append(performance_loss_error)

        pretty_model_name = MODEL_NAMES[model]
        all_metrics[pretty_model_name] = metrics
        all_errors[pretty_model_name] = errors

    fig, ax = plt.subplots(figsize=(20, 6), constrained_layout=True)
    plot_df = pd.DataFrame(all_metrics, index=all_tasks_pretty)
    errs_df = pd.DataFrame(all_errors, index=all_tasks_pretty)
    colors = [MODEL_COLOR_MAP[model] for model in unique_models]
    ax = plot_df.plot.bar(rot=0.0, color=colors, ax=ax, width=0.8, yerr=errs_df, capsize=4)
    annotate_axes(ax, errs_df)
    # Shrink current axis by 20% to make room for the legend
    box = ax.get_position()
    ax.set_position((box.x0, box.y0, box.width * 0.8, box.height))
    ax.set_ylim(bottom=-1, top=1.1)
    ax.legend()
    ax.axhline(0, 0, 1, color="black", linestyle="-")
    ax.set_title("Performance loss per task (lower is better)")
    ax.set_xlabel("Task type")
    ax.set_ylabel("Performance loss")

    outpath = os.path.join(out_dir, "results_split_by_model.png")
    fig.savefig(outpath)
    maybe_show(fig)