in preprocess/htmltable.py [0:0]
def __init__(self, table, normalization=0, remove_hidden=True, first_row_as_caption=True):
"""Create table from a BeautifulSoup table Tag."""
assert table.name == 'table'
self.table = table
self.caption = None if table.caption is None else table.caption.text
if self.caption:
self.caption = clean_cell_value(self.caption)
if first_row_as_caption:
rows = self.table.find_all('tr', recursive=False)
col_num = max(len(row.find_all(['th', 'td'], recursive=False)) for row in rows)
if len(rows) >= 2:
row_1_cells = rows[0].find_all(['th', 'td'], recursive=False)
if row_1_cells:
row_1_cell = row_1_cells[0]
if self.get_int(row_1_cell, 'colspan') == col_num:
self.caption = clean_cell_value(row_1_cell.text)
rows[0].decompose()
if self.caption:
self.caption = re.sub(r'(\[\d+\])+$', '', self.caption)
if remove_hidden:
self.remove_hidden()
if normalization == HtmlTable.NORM_CORNER:
self.normalize_table()
elif normalization == HtmlTable.NORM_DUPLICATE:
self.normalize_table(deep=True)
self.get_cells()