in filtering/deduplication/suffix_dedup.py [0:0]
def generator_from_dataset(dataset): for item in dataset: yield item["text"]