def write_jsonl()

in components/utils/converter.py [0:0]


def write_jsonl(in_bucket_name, in_path, out_bucket_name=None, out_path=None) -> None:
    in_bucket = storage_client.bucket(in_bucket_name)
    if out_bucket_name:
        out_bucket = storage_client.bucket(out_bucket_name)
    else:
        out_bucket = in_bucket
    jsonl_file = "out.jsonl"
    blobs = in_bucket.list_blobs(prefix=in_path)
    with jsonlines.open(jsonl_file, mode="w") as writer:
        for blob in blobs:
            file_name = blob.name
            mime_type = get_mime_type(file_name)
            if mime_type:
                data = {
                    "id": str(generate_document_id(file_name)),
                    "structData": struct_data(blob),
                    "content": {
                        "mimeType": mime_type,
                        "uri": f"gs://{in_bucket_name}/{file_name}",
                    },
                }
                writer.write(data)
    output_file_path = f"{out_path}/{jsonl_file}"
    blob = out_bucket.blob(output_file_path)
    blob.content_type = "application/json"
    blob.upload_from_filename(jsonl_file)
    print(f"JSON files merged and written to gs://{out_bucket_name}/{output_file_path}")