in run/load-embeddings/main.py [0:0]
def split_product_descriptions(df: pd.DataFrame):
"""Splits long product descriptions into smaller chunks"""
text_splitter = RecursiveCharacterTextSplitter(
separators=[".", "\n"],
chunk_size=500,
chunk_overlap=0,
length_function=len,
)
chunked = []
for _, row in df.iterrows():
product_id = row["product_id"]
desc = row["description"]
splits = text_splitter.create_documents([desc])
for s in splits:
r = {"product_id": product_id, "content": s.page_content}
chunked.append(r)
return chunked