def insert_batch()

in fulltext_search/index_docs.py [0:0]


def insert_batch(batch):
    ndjson = ""

    index_name = f"fineweb{random.randint(0, 63)}"

    for text, _id, url, language_score, token_count in zip(
        batch["text"],
        batch["id"],
        batch["url"],
        batch["language_score"],
        batch["token_count"],
    ):
        doc = {
            "insert": {
                "index": index_name,
                "_id": _id.split(":")[-1].strip(">"),
                "doc": {
                    "content": text,
                    "fw_id": _id.split(":")[-1].strip(">"),
                    "url": url,
                    "language_score": language_score,
                    "token_count": token_count,
                },
            }
        }
        ndjson += json.dumps(doc) + "\n"

    response = None
    while response is None:
        try:
            response = requests.post(
                "http://127.0.0.1:9308/bulk",
                headers={"Content-Type": "application/x-ndjson"},
                data=ndjson,
            )
        except requests.exceptions.ConnectionError as e:
            print(e, file=sys.stderr)
            time.sleep(1)
            pass

    return {"response": [response.status_code]}