Path Lines of Code obelics/__init__.py 1 obelics/callers/__init__.py 1 obelics/callers/download_warc.py 63 obelics/callers/extract_html.py 53 obelics/callers/extract_web_documents.py 176 obelics/callers/filter_web_documents.py 196 obelics/callers/line_deduplicate_web_documents.py 65 obelics/configs/config_extract_web_documents.yaml 23 obelics/configs/config_filter_web_documents.yaml 64 obelics/processors/__init__.py 11 obelics/processors/dom_tree_simplificator.py 195 obelics/processors/html_extractor.py 52 obelics/processors/pre_extraction_simplificator.py 152 obelics/processors/warc_downloader.py 36 obelics/processors/web_document_extractor.py 371 obelics/processors/web_document_filtering.py 1002 obelics/processors/web_document_line_deduplication.py 121 obelics/utils/__init__.py 24 obelics/utils/filtering_utils.py 427 obelics/utils/simplification_utils.py 176 obelics/utils/tags_attributes.py 82 obelics/utils/utils.py 4 obelics/visualization/__init__.py 1 obelics/visualization/assets/DOM_tree_viz.html 172 obelics/visualization/choose_filtering_parameters_web_documents_node_level.py 205 obelics/visualization/global_visualization.py 321 obelics/visualization/web_document_and_filtering_visualization.py 675 obelics/visualization/web_document_visualization.py 59