in gke/load-embeddings/main.py [0:0]
def generate_vector_embeddings(df: pd.DataFrame):
"""Generate the vector embeddings for each chunk of text.
Vertex AI text embedding model is used to generate vector embeddings,
which outputs a 768-dimensional vector for each chunk of text.
This may take a few minutes to run."""
aiplatform.init(project=f"{PROJECT_ID}", location=f"{REGION}")
embeddings_service = VertexAIEmbeddings(
model_name="textembedding-gecko@003",
)
chunked = split_product_descriptions(df)
batch_size = 5
for i in range(0, len(chunked), batch_size):
request = [x["content"] for x in chunked[i : i + batch_size]]
response = retry_with_backoff(embeddings_service.embed_documents, request)
# Store the retrieved vector embeddings for each chunk back.
for x, e in zip(chunked[i : i + batch_size], response):
x["embedding"] = e
# Store the generated embeddings in a pandas dataframe.
product_embeddings = pd.DataFrame(chunked)
print(product_embeddings.head())
return product_embeddings