in pytext/metric_reporters/squad_metric_reporter.py [0:0]
def calculate_metric(self):
all_rows = zip(
self.all_context[self.ROW_INDEX],
self.all_context[self.ANSWERS_COLUMN],
self.all_context[self.QUES_COLUMN],
self.all_context[self.DOC_COLUMN],
self.all_pred_answers,
self.all_start_pos_preds,
self.all_end_pos_preds,
self.all_has_answer_preds,
self.all_start_pos_targets,
self.all_end_pos_targets,
self.all_has_answer_targets,
self.all_start_pos_scores,
self.all_end_pos_scores,
self.all_has_answer_scores,
)
all_rows_dict = {}
for row in all_rows:
try:
all_rows_dict[row[0]].append(row)
except KeyError:
all_rows_dict[row[0]] = [row]
all_rows = []
for rows in all_rows_dict.values():
argmax = np.argmax([row[11] + row[12] for row in rows])
all_rows.append(rows[argmax])
sorted(all_rows, key=lambda x: int(x[0]))
(
self.all_context[self.ROW_INDEX],
self.all_context[self.ANSWERS_COLUMN],
self.all_context[self.QUES_COLUMN],
self.all_context[self.DOC_COLUMN],
self.all_pred_answers,
self.all_start_pos_preds,
self.all_end_pos_preds,
self.all_has_answer_preds,
self.all_start_pos_targets,
self.all_end_pos_targets,
self.all_has_answer_targets,
self.all_start_pos_scores,
self.all_end_pos_scores,
self.all_has_answer_scores,
) = zip(*all_rows)
exact_matches = self._compute_exact_matches(
self.all_pred_answers,
self.all_context[self.ANSWERS_COLUMN],
self.all_has_answer_preds,
self.all_has_answer_targets,
)
f1_score = self._compute_f1_score(
self.all_pred_answers,
self.all_context[self.ANSWERS_COLUMN],
self.all_has_answer_preds,
self.all_has_answer_targets,
)
count = len(self.all_has_answer_preds)
self.all_preds = (
self.all_pred_answers,
self.all_start_pos_preds,
self.all_end_pos_preds,
self.all_has_answer_preds,
)
self.all_targets = (
self.all_context[self.ANSWERS_COLUMN],
self.all_start_pos_targets,
self.all_end_pos_targets,
self.all_has_answer_targets,
)
self.all_scores = (
self.all_start_pos_scores,
self.all_end_pos_scores,
self.all_has_answer_scores,
)
label_predictions = None
if not self.ignore_impossible:
label_predictions = [
LabelPrediction(scores, pred, expect)
for scores, pred, expect in zip_longest(
self.all_has_answer_scores,
self.all_has_answer_preds,
self.all_has_answer_targets,
fillvalue=[],
)
]
metrics = SquadMetrics(
exact_matches=100.0 * exact_matches / count,
f1_score=100.0 * f1_score / count,
num_examples=count,
classification_metrics=compute_classification_metrics(
label_predictions,
self.has_answer_labels,
self.calculate_loss(),
)
if label_predictions
else None,
)
return metrics