obelics/processors/__init__.py (11 lines of code) (raw):

from obelics.processors.dom_tree_simplificator import DOMTreeSimplificator from obelics.processors.html_extractor import HtmlExtractor from obelics.processors.pre_extraction_simplificator import PreExtractionSimplificator from obelics.processors.warc_downloader import WarcDownloader from obelics.processors.web_document_extractor import CommonCrawlWebDocumentExtractor from obelics.processors.web_document_filtering import ( FilteringFunctions, WebDocumentFilteringDocLevel, WebDocumentFilteringNodeLevel, ) from obelics.processors.web_document_line_deduplication import WebDocumentLineDeduplication