def process_file()

in collection/paragraph_chunker.py [0:0]


def process_file(tup: Tuple[str, str, Path]) -> None:
    """Chunk all documents in a single file."""
    input_directory, output_directory, input_file = tup
    output_file = str(input_file).replace(input_directory, output_directory)
    output_path = Path(output_file)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(input_file) as f1, open(output_path, 'w') as f2:
        for jsonl in f1:
            doc = json.loads(jsonl)
            passages = chunk_doc(doc['contents'])

            for i, passage in enumerate(passages):
                paragraph = {'id': f"{doc['id']}_p{i}", 'contents': passage}

                f2.write(json.dumps(paragraph) + '\n')