in src/prepdocs/prepdocs.py [0:0]
def update_embeddings_in_batch(sections):
batch_queue = []
copy_s = []
batch_response = {}
token_count = 0
for s in sections:
token_count += calculate_tokens_emb_aoai(s["content"])
if (
token_count <= SUPPORTED_BATCH_AOAI_MODEL[args.openaimodelname]["token_limit"]
and len(batch_queue) < SUPPORTED_BATCH_AOAI_MODEL[args.openaimodelname]["max_batch_size"]
):
batch_queue.append(s)
copy_s.append(s)
else:
emb_responses = compute_embedding_in_batch([item["content"] for item in batch_queue])
if args.verbose:
print(f"Batch Completed. Batch size {len(batch_queue)} Token count {token_count}")
for emb, item in zip(emb_responses, batch_queue):
batch_response[item["id"]] = emb
batch_queue = []
batch_queue.append(s)
token_count = calculate_tokens_emb_aoai(s["content"])
if batch_queue:
emb_responses = compute_embedding_in_batch([item["content"] for item in batch_queue])
if args.verbose:
print(f"Batch Completed. Batch size {len(batch_queue)} Token count {token_count}")
for emb, item in zip(emb_responses, batch_queue):
batch_response[item["id"]] = emb
for s in copy_s:
s["embedding"] = batch_response[s["id"]]
yield s