in dpr/data/tables.py [0:0]
def parse(self) -> List[Table]:
self.all_tables = []
self.tables_stack = collections.deque()
for i in range(self.max_idx):
t = self.tokens[i]
if not self.is_html_mask[i]:
# cell content
self._on_content(t)
continue
if "<Table" in t:
self._on_table_start()
elif t == "</Table>":
self._on_table_end()
elif "<Tr" in t:
self._onRowStart()
elif t == "</Tr>":
self._onRowEnd()
elif "<Td" in t or "<Th" in t:
self._onCellStart()
elif t in ["</Td>", "</Th>"]:
self._on_cell_end()
return self.all_tables