def calculate_metric()

in pytext/metric_reporters/squad_metric_reporter.py [0:0]


    def calculate_metric(self):
        all_rows = zip(
            self.all_context[self.ROW_INDEX],
            self.all_context[self.ANSWERS_COLUMN],
            self.all_context[self.QUES_COLUMN],
            self.all_context[self.DOC_COLUMN],
            self.all_pred_answers,
            self.all_start_pos_preds,
            self.all_end_pos_preds,
            self.all_has_answer_preds,
            self.all_start_pos_targets,
            self.all_end_pos_targets,
            self.all_has_answer_targets,
            self.all_start_pos_scores,
            self.all_end_pos_scores,
            self.all_has_answer_scores,
        )

        all_rows_dict = {}
        for row in all_rows:
            try:
                all_rows_dict[row[0]].append(row)
            except KeyError:
                all_rows_dict[row[0]] = [row]

        all_rows = []
        for rows in all_rows_dict.values():
            argmax = np.argmax([row[11] + row[12] for row in rows])
            all_rows.append(rows[argmax])

        sorted(all_rows, key=lambda x: int(x[0]))

        (
            self.all_context[self.ROW_INDEX],
            self.all_context[self.ANSWERS_COLUMN],
            self.all_context[self.QUES_COLUMN],
            self.all_context[self.DOC_COLUMN],
            self.all_pred_answers,
            self.all_start_pos_preds,
            self.all_end_pos_preds,
            self.all_has_answer_preds,
            self.all_start_pos_targets,
            self.all_end_pos_targets,
            self.all_has_answer_targets,
            self.all_start_pos_scores,
            self.all_end_pos_scores,
            self.all_has_answer_scores,
        ) = zip(*all_rows)

        exact_matches = self._compute_exact_matches(
            self.all_pred_answers,
            self.all_context[self.ANSWERS_COLUMN],
            self.all_has_answer_preds,
            self.all_has_answer_targets,
        )
        f1_score = self._compute_f1_score(
            self.all_pred_answers,
            self.all_context[self.ANSWERS_COLUMN],
            self.all_has_answer_preds,
            self.all_has_answer_targets,
        )
        count = len(self.all_has_answer_preds)
        self.all_preds = (
            self.all_pred_answers,
            self.all_start_pos_preds,
            self.all_end_pos_preds,
            self.all_has_answer_preds,
        )
        self.all_targets = (
            self.all_context[self.ANSWERS_COLUMN],
            self.all_start_pos_targets,
            self.all_end_pos_targets,
            self.all_has_answer_targets,
        )
        self.all_scores = (
            self.all_start_pos_scores,
            self.all_end_pos_scores,
            self.all_has_answer_scores,
        )
        label_predictions = None
        if not self.ignore_impossible:
            label_predictions = [
                LabelPrediction(scores, pred, expect)
                for scores, pred, expect in zip_longest(
                    self.all_has_answer_scores,
                    self.all_has_answer_preds,
                    self.all_has_answer_targets,
                    fillvalue=[],
                )
            ]

        metrics = SquadMetrics(
            exact_matches=100.0 * exact_matches / count,
            f1_score=100.0 * f1_score / count,
            num_examples=count,
            classification_metrics=compute_classification_metrics(
                label_predictions,
                self.has_answer_labels,
                self.calculate_loss(),
            )
            if label_predictions
            else None,
        )
        return metrics