def prepare_data_for_indexing()

in 4-mmrag_tooluse/mmrag_bh.py [0:0]


    def prepare_data_for_indexing(self, image_categorizations):
        non_table_images = [item for item in image_categorizations if item.get(
            'image_category') != 'table']
        if not non_table_images:
            logger.info("No non-table images to process.")
            return None, None

        texts = [item['content_output'] for item in non_table_images]
        embeddings = [get_embedding(text) for text in texts]
        payloads = [
            {
                "image_category": item['image_category'],
                "content_output": item['content_output'],
                "quarter_info": item['quarter_info'],
                "base64_image_path": item['base64_image_path'],
                "original_pdf_path": item['original_pdf_path']
            }
            for item in non_table_images
        ]
        return embeddings, payloads