def _create_examples()

in utils_ranking.py [0:0]


    def _create_examples(self):
        """Creates examples for the training and dev sets. In this method, text_b includes all the candidates."""

        with jsonlines.open(self.filepath, 'r') as reader:

            for idx_i, data in enumerate(reader.iter()):

                input_ids, token_type_ids, attention_mask = [], [], []

                for idx_j, line in enumerate(data['candidates']):

                    guid = "%s-%s" % (self.mode, f"example_{idx_i}_index_{idx_j}") # example_1_index_7
                    text_a = data['question']
                    text_b = line['article_title'] + '[title]' + line['text']
                    label = line['judge']['judge_contain_some'] if type(line['judge']) == dict else line['judge']
                    if idx_j == 0:
                        assert label == 1
                    else:
                        assert label == 0

                    inputs = self.tokenizer.encode_plus(
                        text_a,
                        text_b,
                        add_special_tokens=True,
                        max_length=self.max_length,
                        padding='max_length',
                        truncation='only_second')

                    input_ids.extend(inputs["input_ids"])
                    token_type_ids.extend(inputs["token_type_ids"])
                    attention_mask.extend(inputs["attention_mask"])

                assert len(input_ids) == self.max_length * self.num_cand, f"actual length {len(input_ids)}; required {max_length * num_cand}"
                assert len(token_type_ids) == self.max_length * self.num_cand
                assert len(attention_mask) == self.max_length * self.num_cand

                yield InputFeatures(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        token_type_ids=token_type_ids,
                        label=0
                        )