def _log_additional_metrics()

in evals/elsuite/bugged_tools/eval.py [0:0]


    def _log_additional_metrics(self, metrics: Sequence[Event], results: dict):
        """
        Modifies results in-place, breaks results down per tool and per bug
        """
        all_tools = list(set([j for i in metrics for j in i["tools"]]))
        all_bugs = list(set([j for i in metrics for j in i["bugs"]]))

        # Log bug metrics per type of tool
        for tool in all_tools:
            filtered_metrics = [i for i in metrics if i["tools"][0] == tool]
            tp, fp, tn, fn, accuracy, precision, recall, f1 = precision_recall_fscore(
                filtered_metrics
            )

            results[f"tool_{tool}_f1"] = f1
            results[f"tool_{tool}_precision"] = precision
            results[f"tool_{tool}_recall"] = recall
            results[f"tool_{tool}_accuracy"] = accuracy
            results[f"tool_{tool}_tp"] = tp
            results[f"tool_{tool}_fp"] = fp
            results[f"tool_{tool}_tn"] = tn
            results[f"tool_{tool}_fn"] = fn

        # Log bug metrics per type of bug. Only log accuracy since all examples here are positive (bugged)
        for bug in all_bugs:
            filtered_metrics = [i for i in metrics if len(i["bugs"]) > 0]
            filtered_metrics = [i for i in filtered_metrics if i["bugs"][0] == bug]

            tp, fp, tn, fn, accuracy, precision, recall, f1 = precision_recall_fscore(
                filtered_metrics
            )
            results[f"bug_{bug}_accuracy"] = accuracy