dpr/data/biencoder_data.py [374:398]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            title = jline["title"]
            p = NQTableParser(tokens, mask, title)
            tables = p.parse()

            # table = parse_table(tokens, mask)

            nested_tables += len(tables[1:])

            for t in tables:
                total_tables += 1

                # calc amount of non empty rows
                non_empty_rows = sum([1 for r in t.body if r.cells and any([True for c in r.cells if c.value_tokens])])

                if non_empty_rows <= 1:
                    single_row_tables += 1
                else:
                    regular_tables += 1
                    total_rows += len(t.body)

                    if t.get_key() not in tables_dict:
                        tables_dict[t.get_key()] = t

            if len(tables_dict) % 1000 == 0:
                logger.info("tables_dict %d", len(tables_dict))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



dpr/data/tables.py [203:233]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            title = jline["title"]
            # logger.info('Table from page %s', title)
            # logger.info('tokens len %s', len(tokens))
            # logger.info('tokens %s', tokens)
            # logger.info('page_url %s', page_url)
            p = NQTableParser(tokens, mask, title)
            tables = p.parse()

            # logger.info('parsed tables %d', len(tables))

            # table = parse_table(tokens, mask)
            nested_tables += len(tables[1:])

            for t in tables:
                # logger.info('Table: %s', t)
                total_tables += 1

                # calc amount of non empty rows
                non_empty_rows = sum([1 for r in t.body if r.cells and any([True for c in r.cells if c.value_tokens])])

                if non_empty_rows <= 1:
                    single_row_tables += 1
                else:
                    regular_tables += 1
                    total_rows += len(t.body)

                    if t.get_key() not in tables_dict:
                        tables_dict[t.get_key()] = t

            if len(tables_dict) % 1000 == 0:
                logger.info("tables_dict %d", len(tables_dict))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



