in skills/classification/evaluation/vectordb.py [0:0]
def load_data(self, data):
# Check if the vector database is already loaded
if self.embeddings and self.metadata:
print("Vector database is already loaded. Skipping data loading.")
return
# Check if vector_db.pkl exists
if os.path.exists(self.db_path):
print("Loading vector database from disk.")
self.load_db()
return
texts = [item["text"] for item in data]
# Embed more than 128 documents with a for loop
batch_size = 128
result = [
self.client.embed(
texts[i : i + batch_size],
model="voyage-2"
).embeddings
for i in range(0, len(texts), batch_size)
]
# Flatten the embeddings
self.embeddings = [embedding for batch in result for embedding in batch]
self.metadata = [item for item in data]
# Save the vector database to disk
print("Vector database loaded and saved.")