def add_kv_ocr_confidence()

in src-python/trp/t_pipeline.py [0:0]


def add_kv_ocr_confidence(t_document: t2.TDocument) -> t2.TDocument:
    """
    adds custom attribute to each KEY_VALUE_SET in the form of "Custom":{"OCRConfidence": {'mean': 98.2, 'min': 95.1}}
    If no CHILD relationships exist for a KEY or VALUE, no confidence score will be added.
    """
    for idx, page_block in enumerate(t_document.pages):
        logger.debug(f"page: {idx}")
        key_value_blocks = t_document.forms(page=page_block)
        logger.debug(f"len(key_value_blocks): {len(key_value_blocks)}")
        for key_value_block in key_value_blocks:
            logger.debug(f"key_value_block.id: {key_value_block.id}")
            ocr_blocks = t_document.get_child_relations(key_value_block)
            if ocr_blocks:
                logger.debug(f"len(child-relations: {len(ocr_blocks)}")
                confidence_list: List[float] = [float(x.confidence) for x in ocr_blocks if x.confidence]
                if confidence_list:
                    kv_block_ocr_confidence_mean = statistics.mean(confidence_list)
                    kv_block_ocr_confidence_min = min(confidence_list)
                    if key_value_block.custom:
                        key_value_block.custom['OCRConfidence'] = {
                            'mean': kv_block_ocr_confidence_mean,
                            'min': kv_block_ocr_confidence_min
                        }
                    else:
                        key_value_block.custom = {
                            'OCRConfidence': {
                                'mean': kv_block_ocr_confidence_mean,
                                'min': kv_block_ocr_confidence_min
                            }
                        }
    return t_document