obelics/processors/__init__.py (11 lines of code) (raw):
from obelics.processors.dom_tree_simplificator import DOMTreeSimplificator
from obelics.processors.html_extractor import HtmlExtractor
from obelics.processors.pre_extraction_simplificator import PreExtractionSimplificator
from obelics.processors.warc_downloader import WarcDownloader
from obelics.processors.web_document_extractor import CommonCrawlWebDocumentExtractor
from obelics.processors.web_document_filtering import (
FilteringFunctions,
WebDocumentFilteringDocLevel,
WebDocumentFilteringNodeLevel,
)
from obelics.processors.web_document_line_deduplication import WebDocumentLineDeduplication