def create_embeddings_table_in_vector_db()

in evaluation_pipeline/retrieval.py [0:0]


def create_embeddings_table_in_vector_db(db, model_name, embeddings_sizes, embeddings_dict):
    print("creating table")
    EMBEDDING_SIZE = embeddings_sizes[model_name]
    items = []
    for idx, vec in enumerate(embeddings_dict[model_name]):
        items.append((idx, list(vec)))
    model_name_normalized = model_name.replace("/","_").replace("-","_").replace(".","_")
    db.execute(f"CREATE VIRTUAL TABLE vec_items_{model_name_normalized} USING vec0(embedding float[{EMBEDDING_SIZE}])")

    with db:
        for item in items:
            db.execute(
                f"INSERT INTO vec_items_{model_name_normalized}(rowid, embedding) VALUES (?, ?)",
                [item[0], serialize_f32(item[1])],
            )
    return db