def compute_features()

in datasets.py [0:0]
36 lines of code
4 McCabe index (conditional complexity)

    def compute_features(self, max_input_length: int, max_output_length: int, multitask: bool = False):
        input_sentences = [self.input_format.format_input(example, multitask=multitask) for example in self.examples]
        output_sentences = [self.output_format.format_output(example) for example in self.examples]

        input_sentences = self.truncate_first_n_tokens(examples=input_sentences,
                                                       max_seq_length=max_input_length)
        output_sentences = self.truncate_first_n_tokens(examples=output_sentences,
                                                        max_seq_length=max_output_length)

        input_tok = self.tokenizer.batch_encode_plus(
            input_sentences,
            max_length=max_input_length,
            return_tensors='pt',
            padding='max_length',
            truncation=True,
        )
        output_tok = self.tokenizer.batch_encode_plus(
            output_sentences,
            max_length=max_output_length,
            return_tensors='pt',
            padding='max_length',
            truncation=True,
        )
        self._warn_max_sequence_length(max_input_length, input_sentences, "input")
        self._warn_max_sequence_length(max_output_length, output_sentences, "output")

        assert input_tok.input_ids.size(0) == output_tok.input_ids.size(0), print(
            f'Size does not match: len(sentences_tok.input_ids)={len(input_tok.input_ids)}, '
            f'len(labels_tok.input_ids)={len(output_tok.input_ids)}'
        )
        features = []
        for sentence_input_ids, att_mask, label_input_ids in zip(input_tok.input_ids, input_tok.attention_mask,
                                                                 output_tok.input_ids):
            features.append(InputFeatures(
                input_ids=sentence_input_ids.tolist(),
                attention_mask=att_mask.tolist(),
                label_ids=label_input_ids.tolist()
            ))

        return features