def process_folder()

in 4-mmrag_tooluse/mmrag_bh.py [0:0]


def process_folder(folder: str, base64_output_folder: str) -> List[Dict[str, str]]:
    """
    Processes all PDFs in a folder and extracts base64 images along with their quarter information.
    """
    images_data = []
    quarter_pattern = r'Q[1-4]\d{2}'

    for file in os.listdir(folder):
        if file.endswith(".pdf"):
            match = re.search(quarter_pattern, file)
            if match:
                quarter_info = match.group()
                pdf_path = os.path.join(folder, file)
                base64_images = pdf_to_base64_images(pdf_path)
                for i, base64_image in enumerate(base64_images):
                    base64_filename = f"{os.path.splitext(file)[0]}_page_{i}.txt"
                    base64_path = save_base64_image(
                        base64_image, base64_output_folder, base64_filename)
                    images_data.append({
                        'quarter_info': quarter_info,
                        'base64_image_path': base64_path,
                        'original_pdf_path': pdf_path
                    })
            else:
                logger.warning(
                    f"No quarter information found in filename: {file}")
    return images_data