in 4-mmrag_tooluse/mmrag_bh.py [0:0]
def prepare_data_for_indexing(self, image_categorizations):
non_table_images = [item for item in image_categorizations if item.get(
'image_category') != 'table']
if not non_table_images:
logger.info("No non-table images to process.")
return None, None
texts = [item['content_output'] for item in non_table_images]
embeddings = [get_embedding(text) for text in texts]
payloads = [
{
"image_category": item['image_category'],
"content_output": item['content_output'],
"quarter_info": item['quarter_info'],
"base64_image_path": item['base64_image_path'],
"original_pdf_path": item['original_pdf_path']
}
for item in non_table_images
]
return embeddings, payloads