def __init__()

in tpipelinegeofinder/textractgeofinder/tword.py [0:0]


    def __init__(
            self,
            text: str = None,
            original_text: str = None,
            text_type: str = None,    # word, line or phrase
            confidence: float = None,
            id: str = None,
            xmin: int = None,
            ymin: int = None,
            xmax: int = None,
            ymax: int = None,
            page_number: int = None,
            ocrdb_row=None,
            trp_word: trpWord = None,
            doc_width: int = None,
            doc_height: int = None,
            child_relationships: str = "",
            reference: str = None,
            resolver: str = None):
        """
        resolver: textract, tquery, table, forms
        """

        len_word_params = len([x for x in [text, ocrdb_row, trp_word] if x])
        if len_word_params > 1:
            raise ValueError("Only can take one, text or trp_word or word_position or ocrdb_row.")
        if len_word_params == 0:
            raise ValueError("Have to pass in one of text or trp_word or word_position.")

        if text:
            missing_values: List[str] = list()
            if not text_type:
                missing_values.append("text_type")
            else:
                self.text_type = text_type.lower()
            if not text:
                missing_values.append("text")
            else:
                self.text = text.lower()
            if not id:
                missing_values.append("id")
            else:
                self.id = id
            if original_text:
                self.original_text = original_text
            if not confidence:
                missing_values.append("confidence")
            else:
                self.confidence: float = confidence
            if xmin == None or ymin == None or xmax == None or ymax == None:
                missing_values.append("xmin ymin xmax or ymax")
            else:
                self.xmin: int = xmin
                self.ymin: int = ymin
                self.xmax: int = xmax
                self.ymax: int = ymax
            if not page_number:
                missing_values.append("page_number")
            else:
                self.page_number: int = page_number
            if resolver:
                self.resolver = resolver
            if not doc_width or not doc_height:
                missing_values.append("doc_width or doc_height")
            else:
                self.doc_width = doc_width
                self.doc_height = doc_height
            self.child_relationships = child_relationships
            if reference:
                self.reference = reference
            if missing_values:
                raise ValueError(f"missing: {missing_values}")

        if ocrdb_row:
            self.page_number = ocrdb_row[1]
            self.text_type = ocrdb_row[2]
            self.text = ocrdb_row[3]
            self.original_text = ocrdb_row[4]
            self.confidence = ocrdb_row[5]
            self.xmin = ocrdb_row[6]
            self.ymin = ocrdb_row[7]
            self.xmax = ocrdb_row[8]
            self.ymax = ocrdb_row[9]
            self.id = ocrdb_row[10]
            self.doc_width = ocrdb_row[11]
            self.doc_height = ocrdb_row[12]
            self.child_relationships = ocrdb_row[13]
            self.reference = ocrdb_row[14]

        if trp_word:
            if not (doc_width and doc_height and page_number):
                raise ValueError(
                    f"when using trp_word, need doc_width and doc_height and page_number parameters as well. \
                    doc_width: {doc_width}, doc_height: {doc_height}, page_number: {page_number}")
            if isinstance(trp_word, FieldKey) or isinstance(trp_word, FieldValue):
                self.text = trp_word.text.lower()
                self.text_type = 'KEY' if isinstance(trp_word, FieldKey) else 'VALUE'
                self.original_text = trp_word.text
                if reference:
                    self.reference = reference
            if isinstance(trp_word, trpWord):
                self.text = trp_word.text.lower()
                self.text_type = 'word'
                self.original_text = trp_word.text
            self.confidence = trp_word.confidence
            bbox_width = trp_word.geometry.boundingBox.width
            bbox_height = trp_word.geometry.boundingBox.height
            bbox_left = trp_word.geometry.boundingBox.left
            bbox_top = trp_word.geometry.boundingBox.top
            self.xmin = round(bbox_left * doc_width)
            self.ymin = round(bbox_top * doc_height)
            self.xmax = round(self.xmin + (bbox_width * doc_width))
            self.ymax = round(self.ymin + (bbox_height * doc_height))
            self.page_number = page_number
            if resolver:
                self.resolver = resolver
            self.id = trp_word.id
            self.doc_width = doc_width
            self.doc_height = doc_height
            if isinstance(trp_word, Line):
                self.child_relationships = ",".join([x.id for x in trp_word.words])
            else:
                self.child_relationships = ""