in weak_to_strong/datasets.py [0:0]
def tokenize_dataset( raw_ds: HfDataset, tokenizer: Callable, max_ctx: int,