def summary_counts_for_document()

in web-app-pix2info-python/src/backend/docai.py [0:0]


def summary_counts_for_document(document: Document) -> Mapping[str, Any]:
    """Return a document summary (for direct use by the frontend)."""

    def total_entities(
        entity: Document.Entity,
        page_ref: Document.PageAnchor.PageRef | None = None,
    ) -> int:
        will_render, page_ref = will_render_entity(entity, page_ref)
        total = 1 if will_render else 0
        total += sum(total_entities(entity, page_ref) for entity in entity.properties)
        return total

    def count_list(int_list: list[int]) -> list:
        return [sum(int_list)] + int_list

    # OCR info in document.pages
    pages = 0
    language_codes: set[str] = set()
    block_counts = []
    paragraph_counts = []
    line_counts = []
    token_counts = []
    field_counts = []
    table_counts = []
    barcode_counts = []
    for page in document.pages:
        pages += 1
        language_codes.update(dl.language_code for dl in page.detected_languages)
        block_counts.append(len(page.blocks))
        paragraph_counts.append(len(page.paragraphs))
        line_counts.append(len(page.lines))
        token_counts.append(len(page.tokens))
        field_counts.append(len(page.form_fields))
        table_counts.append(len(page.tables))
        barcode_counts.append(len(page.detected_barcodes))

    # For entities, page info is at the entity level
    entity_counts = [0] * pages
    for entity in document.entities:
        for page_ref in entity.page_anchor.page_refs:
            page_index = page_ref.page
            entity_counts[page_index] += total_entities(entity)

    return dict(
        pages=pages,
        languages=len(language_codes),
        blocks=count_list(block_counts),
        paragraphs=count_list(paragraph_counts),
        lines=count_list(line_counts),
        tokens=count_list(token_counts),
        fields=count_list(field_counts),
        tables=count_list(table_counts),
        entities=count_list(entity_counts),
        barcodes=count_list(barcode_counts),
    )