def get_text()

in microservices/extraction_service/src/utils/table_extractor.py [0:0]


  def get_text(el, data):
    """Convert text offset indexes into text snippets."""
    text = ""

    # Span over the textSegments
    if "textAnchor" in el.keys():
      if "textSegments" in el["textAnchor"].keys():
        for segment in el["textAnchor"]["textSegments"]:
          # Check for startIndex. If not present = 0
          if "startIndex" in segment.keys():
            start_index = segment["startIndex"]
          else:
            start_index = 0
          # Check for endIndex. If not present = 0
          if "endIndex" in segment.keys():
            end_index = segment["endIndex"]
          else:
            end_index = 0
          text += data["text"][int(start_index) : int(end_index)]
          cell_conf = el["confidence"]
          cell_coordinates = el["boundingPoly"]["normalizedVertices"]
          coordinates = []
          for bb_cord in cell_coordinates:
            coordinates.append(deepcopy(bb_cord["x"]))
            coordinates.append(deepcopy(bb_cord["y"]))

    if text in ("", None):
      text = cell_conf = coordinates = None
    return (text, cell_conf, coordinates)