in obelics/processors/web_document_filtering.py [0:0]
def __init__(
self,
cond_check_format,
valid_formats,
cond_check_size_image,
original_width_min_cutoff,
original_width_max_cutoff,
original_height_min_cutoff,
original_height_max_cutoff,
rendered_width_min_cutoff,
rendered_width_max_cutoff,
rendered_height_min_cutoff,
rendered_height_max_cutoff,
aspect_ratio_max_cutoff,
cond_remove_non_printing_characters,
non_printing_characters_re,
cond_standardize_whitespace,
cond_check_number_words_node_level,
strip_characters,
number_words_node_level_min_cutoff,
number_words_node_level_max_cutoff,
cond_check_character_repetition_ratio_node_level,
character_repetition_length_node_level,
character_repetition_node_level_max_cutoff,
cond_check_word_repetition_ratio_node_level,
word_repetition_length_node_level,
word_repetition_node_level_max_cutoff,
cond_check_special_character_ratio_node_level,
special_character_ratio_node_level_max_cutoff,
cond_check_stopword_ratio_node_level,
stopwords,
stopword_ratio_node_level_min_cutoff,
cond_check_flagged_word_ratio_node_level,
flagged_words,
flagged_word_ratio_node_level_max_cutoff,
cond_check_punctuation_ratio_node_level,
min_number_words_to_check_punctuation_ratio_node_level,
punctuation,
punctuation_ratio_node_level_min_cutoff,
cond_check_common_word_ratio_node_level,
path_common_words,
common_word_ratio_node_level_min_cutoff,
cond_check_lang_id_node_level,
path_lang_id_model,
lang_id_node_level_min_cutoff,
cond_check_perplexity_score_node_level,
digits_re,
unicode_punctuation,
path_sentencepiece_model,
path_kenlm_model,
perplexity_score_node_level_max_cutoff,