in src/jobs/tune_bart.py [0:0]
def preprocess_function(self, examples):
inputs = examples["input_text"]
targets = examples["target_text"]
model_inputs = self.tokenizer(
inputs,
max_length=512,
truncation=True,
padding="max_length"
)
labels = self.tokenizer(
targets,
max_length=512,
truncation=True,
padding="max_length"
)
model_inputs["labels"] = labels["input_ids"]
return model_inputs