in paq/paq_utils.py [0:0]
def load_jsonl_fast(fi):
logging.info(f'Loading {fi}')
results = []
with open(fi) as f:
txt = f.read()
logging.info(f'{fi} Loaded, splitting into lines...')
lines = [t for t in txt.split('\n') if t.strip()!='']
logging.info(f'Parsing {len(lines)} items from jsonl:')
for ln, line in enumerate(lines):
results.append(json.loads(line))
logging.info(f'Loaded {ln + 1} Items from {fi}') if ln % 1000000 == 0 else None
logging.info(f'Loaded {ln + 1} Items from {fi}')
return results