pytext/data/squad_for_bert_tensorizer.py [211:276]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def numberize(self, row):
        self.total += 1
        numberized_row_tuple = super().numberize(row)
        try:
            tup = numberized_row_tuple + (
                self._get_token_logits(
                    row[self.start_logits_column], row[self.pad_mask_column]
                ),
                self._get_token_logits(
                    row[self.end_logits_column], row[self.pad_mask_column]
                ),
                row[self.has_answer_logits_column],
            )
        except KeyError:
            # Logits for KD Tensorizer not provided, using padding.
            tup = numberized_row_tuple + (
                [self.vocab.get_pad_index()] * len(numberized_row_tuple[0]),
                [self.vocab.get_pad_index()] * len(numberized_row_tuple[0]),
                [self.vocab.get_pad_index()] * 2,
            )

        try:
            assert len(tup[0]) == len(tup[6])
        except AssertionError:
            self.mismatches += 1
            print(
                f"len(tup[0]) = {len(tup[0])} and len(tup[6]) = {len(tup[6])}",
                flush=True,
            )
            raise
        return tup

    def tensorize(self, batch):
        (
            tokens,
            segment_labels,
            seq_lens,
            positions,
            answer_start_idx,
            answer_end_idx,
            start_logits,
            end_logits,
            has_answer_logits,
        ) = zip(*batch)

        tensor_tuple = super().tensorize(
            zip(
                tokens,
                segment_labels,
                seq_lens,
                positions,
                answer_start_idx,
                answer_end_idx,
            )
        )
        return tensor_tuple + (
            pad_and_tensorize(start_logits, dtype=torch.float),
            pad_and_tensorize(end_logits, dtype=torch.float),
            pad_and_tensorize(
                has_answer_logits,
                dtype=torch.float,
                pad_shape=[len(has_answer_logits), len(has_answer_logits[0])],
            ),
        )

    def _get_token_logits(self, logits, pad_mask):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



pytext/data/squad_for_bert_tensorizer.py [391:455]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def numberize(self, row):
        self.total += 1
        numberized_row_tuple = super().numberize(row)
        try:
            tup = numberized_row_tuple + (
                self._get_token_logits(
                    row[self.start_logits_column], row[self.pad_mask_column]
                ),
                self._get_token_logits(
                    row[self.end_logits_column], row[self.pad_mask_column]
                ),
                row[self.has_answer_logits_column],
            )
        except KeyError:
            # Logits for KD Tensorizer not provided, using padding.
            tup = numberized_row_tuple + (
                [self.vocab.get_pad_index()] * len(numberized_row_tuple[0]),
                [self.vocab.get_pad_index()] * len(numberized_row_tuple[0]),
                [self.vocab.get_pad_index()] * 2,
            )
        try:
            assert len(tup[0]) == len(tup[6])
        except AssertionError:
            self.mismatches += 1
            print(
                f"len(tup[0]) = {len(tup[0])} and len(tup[6]) = {len(tup[6])}",
                flush=True,
            )
            raise
        return tup

    def tensorize(self, batch):
        (
            tokens,
            segment_labels,
            seq_lens,
            positions,
            answer_start_idx,
            answer_end_idx,
            start_logits,
            end_logits,
            has_answer_logits,
        ) = zip(*batch)

        tensor_tuple = super().tensorize(
            zip(
                tokens,
                segment_labels,
                seq_lens,
                positions,
                answer_start_idx,
                answer_end_idx,
            )
        )
        return tensor_tuple + (
            pad_and_tensorize(start_logits, dtype=torch.float),
            pad_and_tensorize(end_logits, dtype=torch.float),
            pad_and_tensorize(
                has_answer_logits,
                dtype=torch.float,
                pad_shape=[len(has_answer_logits), len(has_answer_logits[0])],
            ),
        )

    def _get_token_logits(self, logits, pad_mask):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



