in weak_to_strong/datasets.py [0:0]
def process_function(res): toks = tokenizer(res["txt"]) return dict( input_ids=toks["input_ids"], )