def generate_vector_embeddings()

in run/load-embeddings/main.py [0:0]


def generate_vector_embeddings(df: pd.DataFrame):
    """Generate the vector embeddings for each chunk of text.

    Vertex AI text embedding model is used to generate vector embeddings,
    which outputs a 768-dimensional vector for each chunk of text.

    This may take a few minutes to run."""
    aiplatform.init(project=f"{PROJECT_ID}", location=f"{REGION}")
    embeddings_service = VertexAIEmbeddings(
        model_name="textembedding-gecko@003",
    )
    chunked = split_product_descriptions(df)

    batch_size = 5
    for i in range(0, len(chunked), batch_size):
        request = [x["content"] for x in chunked[i : i + batch_size]]
        response = retry_with_backoff(embeddings_service.embed_documents, request)
        # Store the retrieved vector embeddings for each chunk back.
        for x, e in zip(chunked[i : i + batch_size], response):
            x["embedding"] = e

    # Store the generated embeddings in a pandas dataframe.
    product_embeddings = pd.DataFrame(chunked)
    print(product_embeddings.head())

    return product_embeddings