in src/train_intent.py [0:0]
def _tokenize_function(self, examples):
# extract text
text = examples["sequence"]
# tokenize and truncate text
self.tokenizer.truncation_side = "right"
tokenized_inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True, # Pad the sequences to the longest in the batch
max_length=64
)
return tokenized_inputs