in bq-connector/docai_bq_connector/connector/BqDocumentMapper.py [0:0]
def _parse_entities(self, entities) -> DocumentRow:
row = DocumentRow()
for entity in entities:
if len(entity.page_anchor.page_refs) != 1:
continue
content = entity.mention_text
value = content if content is not None and content.strip() != "" else None
if len(entity.properties) == 0:
if row.find_field_by_name(entity.type_) is not None:
self.errors.append(
ConversionError(
entity.type_,
value,
"Duplicate field definition",
None,
ConversionError.error_type_duplicate_field,
identifier=entity.id,
)
)
continue
row.fields.append(
DocumentField(
entity.type_,
value,
entity.normalized_value,
entity.confidence,
entity.page_anchor.page_refs[0].page + 1,
)
)
else:
parent_field = row.find_field_by_name(entity.type_)
if parent_field is None:
parent_field = DocumentField(
entity.type_,
value,
entity.normalized_value,
entity.confidence,
entity.page_anchor.page_refs[0].page + 1,
)
row.fields.append(parent_field)
row_children = self._parse_entities(entity.properties)
if len(parent_field.children) > 0:
parent_field.children[0].fields.extend(row_children.fields)
else:
parent_field.children.append(row_children)
return row