def add_page_dimensions()

in tpipelinepagedimensions/textractpagedimensions/t_pagedimensions.py [0:0]


def add_page_dimensions(t_document: t2.TDocument, input_document: Union[str, bytes]) -> t2.TDocument:
    """
    adds Page Dimensions to each page of the document in the form of a custom property on the Block
    e. g. {'PageDimension': {'doc_width': 1549.0, 'doc_height': 370.0} }

    """
    page_dimensions: List[DocumentDimensions] = list()

    if isinstance(input_document, str):
        if len(input_document) > 7 and input_document.lower().startswith("s3://"):
            input_document = input_document.replace("s3://", "")
            s3_bucket, s3_key = input_document.split("/", 1)
            page_dimensions = get_width_height_from_s3_object(s3_bucket=s3_bucket, s3_key=s3_key)
        else:
            page_dimensions = get_width_height_from_file(filepath=input_document)

    elif isinstance(input_document, (bytes, bytearray)):
        page_dimensions = get_size_from_filestream(io.BytesIO(input_document), ext=None)
    # bytes do not return a page for the Block, cannot use the mapping logic as above
    if len(t_document.pages) != len(page_dimensions):
        raise AssertionError(
            f"number of pages in document did not match number of dimensions received: document-pages: {len(t_document.pages)}, dimension-pages: {len(page_dimensions)}"
        )
    for idx, block in enumerate(t_document.pages):
        if block.custom:
            if block.page:
                block.custom['PageDimension'] = asdict(page_dimensions[block.page - 1])
            else:
                block.custom['PageDimension'] = asdict(page_dimensions[idx])
        else:
            if block.page:
                block.custom = {'PageDimension': asdict(page_dimensions[block.page - 1])}
            else:
                block.custom = {'PageDimension': asdict(page_dimensions[idx])}

    return t_document