in evals/elsuite/bugged_tools/utils.py [0:0]
def precision_recall_fscore(metrics: Sequence[dict]):
"""
Calculates prediction metrics, where positive class is a tool being bugged. Handles edge cases
where solver never predicted a certain class
"""
def tool_is_buggy(metric):
return len(metric["bugs"]) > 0
# Calculate tp, fp, tn, fn
tp = len([i for i in metrics if i["solver_predicted_bug"] and tool_is_buggy(i)])
fn = len([i for i in metrics if not i["solver_predicted_bug"] and tool_is_buggy(i)])
fp = len([i for i in metrics if i["solver_predicted_bug"] and not tool_is_buggy(i)])
tn = len([i for i in metrics if not i["solver_predicted_bug"] and not tool_is_buggy(i)])
# Calculate accuracy
accuracy = calculate_accuracy(tp, fp, tn, fn)
# If solver never predicts positive class, map each of the following to 0, not nan
precision = calculate_precision(tp, fp)
recall = calculate_recall(tp, fn)
f1 = calculate_f1(precision, recall)
return tp, fp, tn, fn, accuracy, precision, recall, f1