def load_jsonl_fast()

in paq/paq_utils.py [0:0]


def load_jsonl_fast(fi):
    logging.info(f'Loading {fi}')

    results = []
    with open(fi) as f:
        txt = f.read()
        logging.info(f'{fi} Loaded, splitting into lines...')
        lines = [t for t in txt.split('\n') if t.strip()!='']
        logging.info(f'Parsing {len(lines)} items from jsonl:')

    for ln, line in enumerate(lines):
        results.append(json.loads(line))
        logging.info(f'Loaded {ln + 1} Items from {fi}') if ln % 1000000 == 0 else None

    logging.info(f'Loaded {ln + 1} Items from {fi}')
    return results