def precision_recall_fscore()

in evals/elsuite/bugged_tools/utils.py [0:0]


def precision_recall_fscore(metrics: Sequence[dict]):
    """
    Calculates prediction metrics, where positive class is a tool being bugged. Handles edge cases
    where solver never predicted a certain class
    """

    def tool_is_buggy(metric):
        return len(metric["bugs"]) > 0

    # Calculate tp, fp, tn, fn
    tp = len([i for i in metrics if i["solver_predicted_bug"] and tool_is_buggy(i)])
    fn = len([i for i in metrics if not i["solver_predicted_bug"] and tool_is_buggy(i)])

    fp = len([i for i in metrics if i["solver_predicted_bug"] and not tool_is_buggy(i)])
    tn = len([i for i in metrics if not i["solver_predicted_bug"] and not tool_is_buggy(i)])

    # Calculate accuracy
    accuracy = calculate_accuracy(tp, fp, tn, fn)

    # If solver never predicts positive class, map each of the following to 0, not nan
    precision = calculate_precision(tp, fp)
    recall = calculate_recall(tp, fn)
    f1 = calculate_f1(precision, recall)

    return tp, fp, tn, fn, accuracy, precision, recall, f1