in web-app-pix2info-python/src/backend/docai.py [0:0]
def summary_counts_for_document(document: Document) -> Mapping[str, Any]:
"""Return a document summary (for direct use by the frontend)."""
def total_entities(
entity: Document.Entity,
page_ref: Document.PageAnchor.PageRef | None = None,
) -> int:
will_render, page_ref = will_render_entity(entity, page_ref)
total = 1 if will_render else 0
total += sum(total_entities(entity, page_ref) for entity in entity.properties)
return total
def count_list(int_list: list[int]) -> list:
return [sum(int_list)] + int_list
# OCR info in document.pages
pages = 0
language_codes: set[str] = set()
block_counts = []
paragraph_counts = []
line_counts = []
token_counts = []
field_counts = []
table_counts = []
barcode_counts = []
for page in document.pages:
pages += 1
language_codes.update(dl.language_code for dl in page.detected_languages)
block_counts.append(len(page.blocks))
paragraph_counts.append(len(page.paragraphs))
line_counts.append(len(page.lines))
token_counts.append(len(page.tokens))
field_counts.append(len(page.form_fields))
table_counts.append(len(page.tables))
barcode_counts.append(len(page.detected_barcodes))
# For entities, page info is at the entity level
entity_counts = [0] * pages
for entity in document.entities:
for page_ref in entity.page_anchor.page_refs:
page_index = page_ref.page
entity_counts[page_index] += total_entities(entity)
return dict(
pages=pages,
languages=len(language_codes),
blocks=count_list(block_counts),
paragraphs=count_list(paragraph_counts),
lines=count_list(line_counts),
tokens=count_list(token_counts),
fields=count_list(field_counts),
tables=count_list(table_counts),
entities=count_list(entity_counts),
barcodes=count_list(barcode_counts),
)