def process_one()

in src/utils_fusion_in_decoder.py [0:0]


    def process_one(examples):

        output = []

        for (ex_index, example) in enumerate(examples):

            inputs = tokenizer.batch_encode_plus(
                example.source, # source is a list of str
                max_length=max_source_length,
                add_special_tokens=True,
                padding='max_length',
                truncation='longest_first',
                # return_tensors="pt"
            )

            labels = tokenizer.encode(
                example.target,
                max_length=max_source_length,
                add_special_tokens=True,
                padding='max_length',
                truncation='longest_first',
                # return_tensors="pt"
            )

            if int(example.guid.split('-')[-1]) < 10:
                logger.info("*** Example ***")
                logger.info("guid: %s" % (example.guid))
                logger.info("input_ids: %s" % " ".join([str(x) for x in inputs["input_ids"][0]]))
                logger.info("attention_mask: %s" % " ".join([str(x) for x in inputs["attention_mask"][0]]))
                logger.info("input_tokens: %s" % tokenizer.decode(inputs["input_ids"][0]))
                logger.info("labels: %s" % tokenizer.decode(labels))

            for input in inputs['input_ids']:
                assert len(input) == max_source_length

            labels = [x if x > 0 else -100 for x in labels]

            output.append(
                InputFeatures(
                    input_ids=inputs['input_ids'], # list of lists
                    attention_mask=inputs['attention_mask'], # list of lists
                    labels=labels, # list
                )
            )

        return output