in misc/reference_datasets/monolingual/zh/download_mapcc.py [0:0]
def run(self, data, rank: int = 0, world_size: int = 1):
if rank != 0:
return
with open_concatenated_gzip_files(data) as f:
for li, line in enumerate(itertools.islice(f, 0, None)):
yield orjson.loads(line)