def get_all_metrics()

in coreference_metrics.py [0:0]


    def get_all_metrics(self, labels: List[List[List[Tuple[int, int]]]], preds: List[List[List[Tuple[int, int]]]])\
            -> Dict[str, Dict[str, Dict[str, float]]]:
        """
        Compute all metrics for coreference resolution.

        In input are given two list of mention groups, for example:
        [   # this is the corpus level, with a list of documents
            [   # this is the document level, with a list of mention clusters
                [   # this is the cluster level, with a list of spans
                    (5, 7),
                    (11, 19),
                    ...
                ],
                ...
            ]
        ]
        """
        assert len(labels) == len(preds)
        result = {}

        # compute micro-averaged scores (treat all clusters from all docs as a single list of clusters)
        gold_clusters = [
            [(i,) + span for span in cluster] for i, clusters in enumerate(labels) for cluster in clusters
        ]
        predicted_clusters = [
            [(i,) + span for span in cluster] for i, clusters in enumerate(preds) for cluster in clusters
        ]

        result['micro'] = self._compute_coref_metrics(gold_clusters, predicted_clusters)

        # compute macro-averaged scores (compute p/r/f1 for each doc first, then take average per doc)
        doc_metrics = []
        for gold_clusters, predicted_clusters in zip(labels, preds):
            doc_metrics.append(self._compute_coref_metrics(
                gold_clusters, predicted_clusters
            ))
        result['macro'] = self._average_nested_dict(doc_metrics)

        return result