in fulltext_search/search_sharded.py [0:0]
def run_query(query, n_pages):
while True:
try:
max_pages = 4_000
response = requests.post(
"http://127.0.0.1:9308/search",
data=json.dumps(
{
"index": "fineweb",
"size": n_pages,
"query": query,
"max_matches": max_pages,
}
),
timeout=1000,
)
if response.status_code != 200:
print(response.text, file=sys.stderr)
time.sleep(5)
continue
else:
hits = response.json()["hits"]["hits"]
return hits
except requests.exceptions.ConnectionError as e:
print(e, file=sys.stderr)
time.sleep(5)
continue