def _figure_area()

in chunking/chunkers/multimodal_chunker.py [0:0]


    def _figure_area(self, figure: Dict, pages: List[Dict]) -> float:
        """
        Calculate the total figure area by summing the areas of all bounding regions across pages.
        
        Args:
            figure (Dict): A dictionary representing the figure with 'boundingRegions', 
                        where each bounding region contains 'pageNumber' and 'polygon'.
            pages (List[Dict]): A list of page dictionaries each containing 'pageNumber', 'width', and 'height'.
        
        Returns:
            float: The total area of all valid bounding regions across pages.
                Returns 0.0 if no valid bounding regions are found or an error occurs.
        """
        total_area = 0.0

        # Ensure 'boundingRegions' exists in the figure
        bounding_regions = figure.get('boundingRegions', [])
        if not bounding_regions:
            logging.warning(f"[multimodal_chunker][{self.filename}] No boundingRegions found in figure.")
            return total_area  # Returns 0.0

        # Create a lookup dictionary for pages to optimize performance
        page_lookup = {page['pageNumber']: page for page in pages}

        for idx, bounding_region in enumerate(bounding_regions, start=1):
            try:
                # Extract bounding region details
                page_number = bounding_region['pageNumber']
                polygon = bounding_region['polygon']
            except KeyError as e:
                logging.error(f"[multimodal_chunker][{self.filename}] Bounding region {idx} is missing key: {e}")
                continue  # Skip this bounding region

            # Find the corresponding page using the lookup dictionary
            page = page_lookup.get(page_number)
            if not page:
                logging.info(f"[multimodal_chunker][{self.filename}] No matching page found for pageNumber: {page_number} in bounding region {idx}.")
                continue  # Skip this bounding region

            page_width = page.get('width')
            page_height = page.get('height')

            # Validate page dimensions
            if page_width is None or page_height is None:
                logging.error(f"[multimodal_chunker][{self.filename}] Page {page_number} is missing 'width' or 'height'.")
                continue  # Skip this bounding region

            try:
                # Calculate polygon area using a helper method
                polygon_area = self._calculate_polygon_area(polygon)
            except ValueError as ve:
                logging.error(f"[multimodal_chunker][{self.filename}] Error calculating area for figure on page {page_number}, bounding region {idx}: {ve}")
                continue  # Skip this bounding region

            # Optionally, validate that the polygon area does not exceed the page area
            page_area = page_width * page_height
            if polygon_area > page_area:
                logging.warning(
                    f"[multimodal_chunker][{self.filename}] Polygon area {polygon_area:.2f} exceeds page area {page_area:.2f} on page {page_number}, bounding region {idx}."
                )
                # Depending on requirements, we might choose to:
                # - Skip adding this area
                # - Cap the polygon area to the page area
                # - Include the area as is (current implementation)
                # Here, we'll include it.

            # Accumulate the total area
            total_area += polygon_area

            logging.debug(
                f"[multimodal_chunker][{self.filename}] Figure on Page {page_number}, Bounding Region {idx}: "
                f"Polygon Coordinates: {polygon}, "
                f"Polygon Area: {polygon_area:.2f}, "
                f"Accumulated Total Area: {total_area:.2f}"
            )

        if total_area == 0.0:
            logging.warning(f"[multimodal_chunker][{self.filename}] No valid bounding regions found to calculate total area.")

        return total_area