in microservices/extraction_service/src/utils/utils_functions.py [0:0]
def extract_form_fields(doc_element: dict, document: dict):
"""
# Extract form fields from form parser raw json
Parameters
----------
doc_element: Entitiy
document: Extracted OCR Text
Returns: Entity name and Confidence
-------
"""
response = ""
list_of_coordidnates = []
# If a text segment spans several lines, it will
# be stored in different text segments.
for segment in doc_element.text_anchor.text_segments:
start_index = (
int(segment.start_index)
if segment in doc_element.text_anchor.text_segments
else 0
)
end_index = int(segment.end_index)
response += document.text[start_index:end_index]
confidence = doc_element.confidence
coordinate = list([doc_element.bounding_poly.normalized_vertices])
# print("coordinate", coordinate)
# print("type", type(coordinate))
for item in coordinate:
# print("item", item)
# print("type", type(item))
for xy_coordinate in item:
# print("xy_coordinate", xy_coordinate)
# print("x", xy_coordinate.x)
list_of_coordidnates.append(float(round(xy_coordinate.x, 4)))
list_of_coordidnates.append(float(round(xy_coordinate.y, 4)))
return response, confidence, list_of_coordidnates