def process_one()

in src/utils_data.py [0:0]


    def process_one(example):

        inputs = tokenizer.encode_plus(
            example.source,
            max_length=max_source_length,
            add_special_tokens=True,
            padding='max_length',
            truncation='longest_first',
            # return_tensors="pt"
        )
        labels = tokenizer.encode(
            example.target,
            max_length=max_target_length,
            add_special_tokens=True,
            padding='max_length',
            truncation='longest_first',
            # return_tensors="pt"
        )

        assert len(inputs['input_ids']) == max_source_length
        assert len(labels) == max_target_length

        if int(example.guid.split('-')[-1]) < 5:
            logger.info("*** Example ***")
            logger.info("guid: %s" % (example.guid))
            logger.info("input_ids: %s" % " ".join([str(x) for x in inputs["input_ids"]]))
            logger.info("attention_mask: %s" % " ".join([str(x) for x in inputs["attention_mask"]]))
            logger.info("input_tokens: %s" % tokenizer.decode(inputs['input_ids']))
            logger.info("labels: %s" % tokenizer.decode(labels))

        labels = [x if x > 0 else -100 for x in labels]

        return InputFeatures(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            labels=labels,
            )