in microservices/extraction_service/src/utils/table_extractor.py [0:0]
def get_text(el, data):
"""Convert text offset indexes into text snippets."""
text = ""
# Span over the textSegments
if "textAnchor" in el.keys():
if "textSegments" in el["textAnchor"].keys():
for segment in el["textAnchor"]["textSegments"]:
# Check for startIndex. If not present = 0
if "startIndex" in segment.keys():
start_index = segment["startIndex"]
else:
start_index = 0
# Check for endIndex. If not present = 0
if "endIndex" in segment.keys():
end_index = segment["endIndex"]
else:
end_index = 0
text += data["text"][int(start_index) : int(end_index)]
cell_conf = el["confidence"]
cell_coordinates = el["boundingPoly"]["normalizedVertices"]
coordinates = []
for bb_cord in cell_coordinates:
coordinates.append(deepcopy(bb_cord["x"]))
coordinates.append(deepcopy(bb_cord["y"]))
if text in ("", None):
text = cell_conf = coordinates = None
return (text, cell_conf, coordinates)