def _compute_agg_metrics()

in evals/elsuite/identifying_variables/eval.py [0:0]


    def _compute_agg_metrics(self, metrics: List[Dict]) -> Dict[str, float]:
        """
        Computes aggregate metrics across all samples
        """
        main_metrics = {
            "hyp_valid_acc": np.mean([x["hyp_valid_correct"] for x in metrics]),
            "violation_count": np.sum([x["violation"] for x in metrics]),
            "violation_rate": np.mean([x["violation"] for x in metrics]),
            # Some samples may be NaN for cases where the target hypothesis is invalid
            "ctrl_nDCG": np.nanmean([x["ctrl_nDCG"] for x in metrics]),
            "ctrl_recall": np.nanmean([x["ctrl_recall"] for x in metrics]),
            "ctrl_fallout": np.nanmean([x["ctrl_fallout"] for x in metrics]),
            "ind_acc": np.nanmean([x["ind_correct"] for x in metrics]),
            "dep_acc": np.nanmean([x["dep_correct"] for x in metrics]),
            "n_valid_hyp": np.sum([x["valid_hyp"] for x in metrics]),
        }
        if self.group_metrics:
            grouped_metrics = self._compute_grouped_metrics(metrics)
        else:
            grouped_metrics = {}

        total_metrics = {**main_metrics, **grouped_metrics}
        total_metrics = {k: float(v) for k, v in total_metrics.items()}
        return total_metrics