def table_attributes()

in microservices/extraction_service/src/utils/table_extractor.py [0:0]


  def table_attributes(self):
    """
    This function obtains information regarding all the tables.
    For ex. total tables, table header info, table row wise data
		in dataframe format
    """

    if "pages" in self.data.keys():
      # Iterate over pages
      for pg_num, page in enumerate(self.data["pages"]):

        page_data = {}
        if "tables" in page.keys():

          # Iterate over tables
          for table_num, table in enumerate(page["tables"]):

            # extract header(columns)
            if "bodyRows" in table and "headerRows" in table:
              for _, hrow in enumerate(table["headerRows"]):
                header_row = [
                  TableExtractor.get_text(
                    cell["layout"], self.data) for cell in hrow["cells"]
                ]
                columns = []
                for val, conf, cord in header_row:
                  if val is None:
                    columns.append(val, conf, cord)
                  else:
                    columns.append((" ".join(val.split()), conf, cord))
                table_data = {"headers": columns}
                table_data["page_num"] = pg_num
                col_data = {}
                try:
                  for row_num, row in enumerate(table["bodyRows"]):
                    row_data = [
                        TableExtractor.get_text(
                          cell["layout"], self.data) for cell in row["cells"]
                    ]
                    for i_col in range(len(header_row)):
                      entity_val, conf, coordinates = row_data[i_col]
                      col_data[i_col] = {
                        "value": entity_val,
                        "extraction_confidence": conf,
                        "value_coordinates": coordinates,
                        "manual_extraction": False,
                        "corrected_value": None
                      }
                    table_data[row_num] = {"rows": deepcopy(col_data)}

                except ValueError as e:
                  Logger.error(e)
                  return "Table Empty !!!"

              page_data[table_num] = table_data
              page_data["height"] = page["dimension"]["height"]
              page_data["width"] = page["dimension"]["width"]
          self.master_dict[pg_num] = page_data
    else:
      Logger.error("no data found in table")
      return None