in evaluation_pipeline/retrieval.py [0:0]
def create_embeddings_table_in_vector_db(db, model_name, embeddings_sizes, embeddings_dict):
print("creating table")
EMBEDDING_SIZE = embeddings_sizes[model_name]
items = []
for idx, vec in enumerate(embeddings_dict[model_name]):
items.append((idx, list(vec)))
model_name_normalized = model_name.replace("/","_").replace("-","_").replace(".","_")
db.execute(f"CREATE VIRTUAL TABLE vec_items_{model_name_normalized} USING vec0(embedding float[{EMBEDDING_SIZE}])")
with db:
for item in items:
db.execute(
f"INSERT INTO vec_items_{model_name_normalized}(rowid, embedding) VALUES (?, ?)",
[item[0], serialize_f32(item[1])],
)
return db