in training/scripts/train.py [0:0]
def preprocess_function(examples): return tokenizer(examples["review"], padding="max_length", truncation=True)