def get_bounding_boxes()

in overlayer/textractoverlayer/t_overlay.py [0:0]


def get_bounding_boxes(textract_json: dict, overlay_features: List[Textract_Types],
                       document_dimensions: DocumentDimensions) -> List[BoundingBox]:
    doc = trp.Document(textract_json)
    bounding_box_list: List[BoundingBox] = list()
    page_number: int = 0
    for page in doc.pages:
        page_number += 1
        if Textract_Types.WORD in overlay_features or Textract_Types.LINE in overlay_features:
            for line in page.lines:
                if Textract_Types.LINE in overlay_features:
                    if line:
                        bounding_box_list.append(
                            BoundingBox(geometry=line.geometry,
                                        document_dimensions=document_dimensions,
                                        box_type=Textract_Types.LINE,
                                        page_number=page_number,
                                        confidence=line.confidence,
                                        text=line.text))
                if Textract_Types.WORD in overlay_features:
                    for word in line.words:
                        if word:
                            bounding_box_list.append(
                                BoundingBox(geometry=word.geometry,
                                            document_dimensions=document_dimensions,
                                            box_type=Textract_Types.WORD,
                                            page_number=page_number,
                                            confidence=word.confidence,
                                            text=word.text))

        if any([x for x in overlay_features if x in [Textract_Types.FORM, Textract_Types.KEY, Textract_Types.VALUE]]):
            for field in page.form.fields:
                if any([x for x in overlay_features if x in [Textract_Types.FORM, Textract_Types.KEY]]):
                    if field and field.key:
                        bounding_box_list.append(
                            BoundingBox(geometry=field.key.geometry,
                                        document_dimensions=document_dimensions,
                                        box_type=Textract_Types.KEY,
                                        page_number=page_number,
                                        confidence=field.key.confidence,
                                        text=field.key.text))
                if any([x for x in overlay_features if x in [Textract_Types.FORM, Textract_Types.VALUE]]):
                    if field and field.value:
                        bounding_box_list.append(
                            BoundingBox(geometry=field.value.geometry,
                                        document_dimensions=document_dimensions,
                                        box_type=Textract_Types.VALUE,
                                        page_number=page_number,
                                        confidence=field.value.confidence,
                                        text=field.value.text))

        if any([x for x in overlay_features if x in [Textract_Types.TABLE, Textract_Types.CELL]]):
            for table in page.tables:
                if Textract_Types.TABLE in overlay_features:
                    bounding_box_list.append(
                        BoundingBox(geometry=table.geometry,
                                    document_dimensions=document_dimensions,
                                    box_type=Textract_Types.TABLE,
                                    page_number=page_number,
                                    confidence=table.confidence,
                                    text="table"))

                if Textract_Types.CELL in overlay_features:
                    for _, row in enumerate(table.rows):
                        for _, cell in enumerate(row.cells):
                            if cell:
                                bounding_box_list.append(
                                    BoundingBox(geometry=cell.geometry,
                                                document_dimensions=document_dimensions,
                                                box_type=Textract_Types.CELL,
                                                page_number=page_number,
                                                confidence=cell.confidence,
                                                text="cell"))

    return bounding_box_list