id: 1 unit: def filtering() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 526 LOC McCabe index: 3 number of parameters: 1 id: 2 unit: def extraction_mode() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 170 LOC McCabe index: 28 number of parameters: 1 id: 3 unit: def __call__() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 158 LOC McCabe index: 47 number of parameters: 2 id: 4 unit: def __call__() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 93 LOC McCabe index: 27 number of parameters: 2 id: 5 unit: def get_args() file: obelics/callers/extract_web_documents.py start line: 0 end line: 0 size: 91 LOC McCabe index: 2 number of parameters: 0 id: 6 unit: def get_args() file: build_obelics/03_dl_images_create_dataset.py start line: 0 end line: 0 size: 84 LOC McCabe index: 1 number of parameters: 0 id: 7 unit: def get_args() file: build_obelics/05_filtering_web_docs.py start line: 0 end line: 0 size: 57 LOC McCabe index: 1 number of parameters: 0 id: 8 unit: def __reduce__() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 55 LOC McCabe index: 1 number of parameters: 1 id: 9 unit: def __init__() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 54 LOC McCabe index: 3 number of parameters: 3 id: 10 unit: def get_args() file: obelics/callers/filter_web_documents.py start line: 0 end line: 0 size: 52 LOC McCabe index: 1 number of parameters: 0 id: 11 unit: def __init__() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 51 LOC McCabe index: 1 number of parameters: 0 id: 12 unit: def urls_to_images() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 50 LOC McCabe index: 17 number of parameters: 5 id: 13 unit: def get_media_src() file: obelics/utils/simplification_utils.py start line: 0 end line: 0 size: 49 LOC McCabe index: 29 number of parameters: 1 id: 14 unit: def _merge_consecutive_text_nodes() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 48 LOC McCabe index: 20 number of parameters: 2 id: 15 unit: def get_args() file: obelics/callers/line_deduplicate_web_documents.py start line: 0 end line: 0 size: 43 LOC McCabe index: 1 number of parameters: 0 id: 16 unit: def func_html_to_web_documents() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 43 LOC McCabe index: 7 number of parameters: 1 id: 17 unit: def __reduce__() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 43 LOC McCabe index: 1 number of parameters: 1 id: 18 unit: def get_args() file: build_obelics/04_merge_web_docs_with_images.py start line: 0 end line: 0 size: 41 LOC McCabe index: 1 number of parameters: 0 id: 19 unit: def line_deduplicate_web_documents() file: obelics/processors/web_document_line_deduplication.py start line: 0 end line: 0 size: 39 LOC McCabe index: 13 number of parameters: 1 id: 20 unit: def __init__() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 39 LOC McCabe index: 1 number of parameters: 0 id: 21 unit: def simplify_media_node() file: obelics/utils/simplification_utils.py start line: 0 end line: 0 size: 38 LOC McCabe index: 17 number of parameters: 2 id: 22 unit: def simplification_mode() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 38 LOC McCabe index: 2 number of parameters: 1 id: 23 unit: def get_args() file: build_obelics/02_extract_html_get_image_urls.py start line: 0 end line: 0 size: 35 LOC McCabe index: 1 number of parameters: 0 id: 24 unit: def func_map_final_cleaning_node_level() file: build_obelics/10_final_cleaning.py start line: 0 end line: 0 size: 34 LOC McCabe index: 9 number of parameters: 1 id: 25 unit: def get_args() file: build_obelics/06_03_remove_image_duplicates.py start line: 0 end line: 0 size: 33 LOC McCabe index: 1 number of parameters: 0 id: 26 unit: def get_html_from_warc() file: obelics/processors/html_extractor.py start line: 0 end line: 0 size: 33 LOC McCabe index: 12 number of parameters: 2 id: 27 unit: def process_one_tar() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 31 LOC McCabe index: 8 number of parameters: 1 id: 28 unit: def get_args() file: build_obelics/02_bis_extract_html_get_image_urls_new_rules.py start line: 0 end line: 0 size: 29 LOC McCabe index: 1 number of parameters: 0 id: 29 unit: def final_cleaning_node_level() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 28 LOC McCabe index: 8 number of parameters: 3 id: 30 unit: def get_args() file: build_obelics/01_download_warc.py start line: 0 end line: 0 size: 27 LOC McCabe index: 1 number of parameters: 0 id: 31 unit: def get_domain_to_duplicated_texts() file: build_obelics/09_04_get_domain_to_duplicated_texts.py start line: 0 end line: 0 size: 27 LOC McCabe index: 17 number of parameters: 1 id: 32 unit: def get_args() file: build_obelics/06_01_create_set_image_urls_in_webdocs.py start line: 0 end line: 0 size: 27 LOC McCabe index: 1 number of parameters: 0 id: 33 unit: def make_tree() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 27 LOC McCabe index: 4 number of parameters: 4 id: 34 unit: def display_document() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 26 LOC McCabe index: 8 number of parameters: 1 id: 35 unit: def get_domain_to_positions() file: obelics/processors/web_document_line_deduplication.py start line: 0 end line: 0 size: 25 LOC McCabe index: 10 number of parameters: 1 id: 36 unit: def _remove_empty_leaves() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 23 LOC McCabe index: 12 number of parameters: 2 id: 37 unit: def get_domain_to_duplicated_texts() file: obelics/processors/web_document_line_deduplication.py start line: 0 end line: 0 size: 23 LOC McCabe index: 12 number of parameters: 1 id: 38 unit: def urls_to_images() file: build_obelics/04_merge_web_docs_with_images.py start line: 0 end line: 0 size: 22 LOC McCabe index: 11 number of parameters: 5 id: 39 unit: def merge_consecutive_END_OF_DOCUMENT_TOKEN_TO_BE_REPLACED() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 22 LOC McCabe index: 9 number of parameters: 3 id: 40 unit: def get_args() file: obelics/callers/extract_html.py start line: 0 end line: 0 size: 22 LOC McCabe index: 1 number of parameters: 0 id: 41 unit: def get_args() file: obelics/callers/download_warc.py start line: 0 end line: 0 size: 22 LOC McCabe index: 1 number of parameters: 0 id: 42 unit: def urls_to_images() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 21 LOC McCabe index: 3 number of parameters: 2 id: 43 unit: def choose_document() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 21 LOC McCabe index: 3 number of parameters: 1 id: 44 unit: def __call__() file: build_obelics/06_03_remove_image_duplicates.py start line: 0 end line: 0 size: 20 LOC McCabe index: 11 number of parameters: 2 id: 45 unit: def func_map_final_processing_node_level() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 19 LOC McCabe index: 1 number of parameters: 1 id: 46 unit: def format_filename() file: obelics/utils/simplification_utils.py start line: 0 end line: 0 size: 19 LOC McCabe index: 4 number of parameters: 1 id: 47 unit: def _strip_special_divs() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 19 LOC McCabe index: 12 number of parameters: 2 id: 48 unit: def __init__() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 19 LOC McCabe index: 1 number of parameters: 0 id: 49 unit: def get_exs_and_stats() file: obelics/visualization/choose_filtering_parameters_web_documents_node_level.py start line: 0 end line: 0 size: 19 LOC McCabe index: 9 number of parameters: 4 id: 50 unit: def save_split_sharded_already_splitted_dataset() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 18 LOC McCabe index: 2 number of parameters: 3 id: 51 unit: def standardize_whitespace() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 17 LOC McCabe index: 1 number of parameters: 0 id: 52 unit: def strip() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 17 LOC McCabe index: 6 number of parameters: 2 id: 53 unit: def compute_character_repetition_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 17 LOC McCabe index: 5 number of parameters: 2 id: 54 unit: def load_dataset() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 17 LOC McCabe index: 7 number of parameters: 1 id: 55 unit: def _format_texts() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 16 LOC McCabe index: 9 number of parameters: 2 id: 56 unit: def save_split_sharded_dataset() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 16 LOC McCabe index: 3 number of parameters: 3 id: 57 unit: def display_document() file: obelics/visualization/web_document_visualization.py start line: 0 end line: 0 size: 16 LOC McCabe index: 4 number of parameters: 1 id: 58 unit: def get_domain_to_positions() file: build_obelics/09_02_get_domain_to_positions.py start line: 0 end line: 0 size: 15 LOC McCabe index: 6 number of parameters: 0 id: 59 unit: def remove_duplicated_images() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 15 LOC McCabe index: 11 number of parameters: 3 id: 60 unit: def format_image_size() file: obelics/utils/simplification_utils.py start line: 0 end line: 0 size: 15 LOC McCabe index: 2 number of parameters: 1 id: 61 unit: def __call__() file: obelics/processors/warc_downloader.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 2 id: 62 unit: def __init__() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 15 LOC McCabe index: 1 number of parameters: 0 id: 63 unit: def _unnest_nodes() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 15 LOC McCabe index: 7 number of parameters: 2 id: 64 unit: def choose_document() file: obelics/visualization/web_document_visualization.py start line: 0 end line: 0 size: 15 LOC McCabe index: 2 number of parameters: 1 id: 65 unit: def select_mode() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 15 LOC McCabe index: 5 number of parameters: 1 id: 66 unit: def choose_example() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 15 LOC McCabe index: 2 number of parameters: 1 id: 67 unit: def __call__() file: build_obelics/09_06_line_dedup.py start line: 0 end line: 0 size: 14 LOC McCabe index: 6 number of parameters: 2 id: 68 unit: def __call__() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 14 LOC McCabe index: 2 number of parameters: 2 id: 69 unit: def _remove_dates() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 14 LOC McCabe index: 10 number of parameters: 2 id: 70 unit: def compute_word_repetition_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 14 LOC McCabe index: 5 number of parameters: 3 id: 71 unit: def remove_spam_paragraphs() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 13 LOC McCabe index: 5 number of parameters: 3 id: 72 unit: def _remove_nodes_matching_css_rules() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 13 LOC McCabe index: 6 number of parameters: 2 id: 73 unit: def save_split_sharded_dataset() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 13 LOC McCabe index: 3 number of parameters: 2 id: 74 unit: def compute_punctuation_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 13 LOC McCabe index: 4 number of parameters: 3 id: 75 unit: def process_image() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 2 id: 76 unit: def __call__() file: obelics/processors/html_extractor.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 2 id: 77 unit: def format_relative_to_absolute_path() file: obelics/utils/simplification_utils.py start line: 0 end line: 0 size: 11 LOC McCabe index: 4 number of parameters: 2 id: 78 unit: def check_size_image() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 79 unit: def normalization() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 80 unit: def __call__() file: build_obelics/08_02_urldedup.py start line: 0 end line: 0 size: 10 LOC McCabe index: 6 number of parameters: 2 id: 81 unit: def __call__() file: build_obelics/12_02_remove_opt_out_images.py start line: 0 end line: 0 size: 10 LOC McCabe index: 11 number of parameters: 2 id: 82 unit: def remove_end_END_OF_DOCUMENT_TOKEN_TO_BE_REPLACED() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 10 LOC McCabe index: 5 number of parameters: 3 id: 83 unit: def get_warc_from_metadata() file: obelics/processors/warc_downloader.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 5 id: 84 unit: def __call__() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 10 LOC McCabe index: 4 number of parameters: 3 id: 85 unit: def get_image_urls() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 10 LOC McCabe index: 6 number of parameters: 3 id: 86 unit: def download_images() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 1 id: 87 unit: def get_words_from_text() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 10 LOC McCabe index: 7 number of parameters: 4 id: 88 unit: def _strip_html_tree() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 2 id: 89 unit: def load_dataset() file: obelics/visualization/web_document_visualization.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 1 id: 90 unit: def visualization() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 1 id: 91 unit: def compute_spam_word_ratio() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 1 id: 92 unit: def is_url_valid() file: obelics/utils/simplification_utils.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 1 id: 93 unit: def traverse() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 1 id: 94 unit: def download_images() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 95 unit: def create_dataset_images() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 1 id: 96 unit: def __init__() file: obelics/processors/web_document_line_deduplication.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 97 unit: def check_number_words() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 4 id: 98 unit: def compute_stopword_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 3 id: 99 unit: def compute_flagged_word_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 3 id: 100 unit: def compute_common_word_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 3 id: 101 unit: def check_perplexity_score() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 102 unit: def __init__() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 103 unit: def get_dom_viz_html() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 2 id: 104 unit: def func_filter_final_cleaning_doc_level() file: build_obelics/10_final_cleaning.py start line: 0 end line: 0 size: 7 LOC McCabe index: 7 number of parameters: 1 id: 105 unit: def __call__() file: build_obelics/11_03_set_img_urls_dedup.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 2 id: 106 unit: def transform_img() file: build_obelics/07_02_nsfw_image_visualization.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 1 id: 107 unit: def compute_nsfw_scores() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 3 id: 108 unit: def final_cleaning_doc_level() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 7 LOC McCabe index: 7 number of parameters: 1 id: 109 unit: def _only_text_image_nodes() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 7 LOC McCabe index: 6 number of parameters: 2 id: 110 unit: def html_to_web_documents() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 111 unit: def html_to_web_documents() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 1 id: 112 unit: def check_special_character_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 3 id: 113 unit: def check_stopword_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 4 id: 114 unit: def check_lang_id() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 4 id: 115 unit: def visualization() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 1 id: 116 unit: def choose_mode() file: obelics/visualization/global_visualization.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 1 id: 117 unit: def func_filter_final_processing_doc_level() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 1 id: 118 unit: def create_dataset_images_from_tar() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 119 unit: def create_dataset_images() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 120 unit: def save_commit_hash() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 1 id: 121 unit: def compute_lang_id_pred_score() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 2 id: 122 unit: def transform_img() file: obelics/visualization/choose_filtering_parameters_web_documents_node_level.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 1 id: 123 unit: def __call__() file: build_obelics/07_03_nsfw_image_removal.py start line: 0 end line: 0 size: 5 LOC McCabe index: 5 number of parameters: 2 id: 124 unit: def func_map_replace_images_by_urls() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 125 unit: def __init__() file: obelics/processors/warc_downloader.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 126 unit: def __init__() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 5 id: 127 unit: def __init__() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 128 unit: def _replace_nodes_matching_css_rules_with_text() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 2 id: 129 unit: def write_file() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 130 unit: def check_format() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 2 id: 131 unit: def check_word_repetition_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 132 unit: def compute_special_character_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 4 number of parameters: 2 id: 133 unit: def check_flagged_word_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 134 unit: def check_punctuation_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 135 unit: def check_common_word_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 136 unit: def tokenization() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 3 id: 137 unit: def visualization() file: obelics/visualization/web_document_visualization.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 138 unit: def __init__() file: build_obelics/09_06_line_dedup.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 139 unit: def __init__() file: build_obelics/08_02_urldedup.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 140 unit: def __init__() file: build_obelics/11_03_set_img_urls_dedup.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 141 unit: def __init__() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 142 unit: def __call__() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 143 unit: def __init__() file: build_obelics/07_03_nsfw_image_removal.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 144 unit: def __init__() file: build_obelics/12_02_remove_opt_out_images.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 145 unit: def remove_texts_only_END_OF_DOCUMENT_TOKEN_TO_BE_REPLACED() file: build_obelics/13_final_processing.py start line: 0 end line: 0 size: 4 LOC McCabe index: 4 number of parameters: 1 id: 146 unit: def __init__() file: build_obelics/06_03_remove_image_duplicates.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 147 unit: def __init__() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 148 unit: def __call__() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 149 unit: def _unwrap_html_tree() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 4 number of parameters: 2 id: 150 unit: def get_image_urls() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 151 unit: def save_dataset() file: obelics/processors/web_document_extractor.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 152 unit: def check_number_images() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 3 id: 153 unit: def split_on_whitespace() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 154 unit: def check_character_repetition_ratio() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 155 unit: def unroll_list() file: build_obelics/08_01_prepare_urldedup.py start line: 0 end line: 0 size: 3 LOC McCabe index: 3 number of parameters: 1 id: 156 unit: def opt_in_out() file: build_obelics/12_01_find_opt_out_images.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 157 unit: def __init__() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 158 unit: def make_selectolax_tree() file: obelics/utils/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 159 unit: def _strip_multiple_linebreaks() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 160 unit: def _strip_multiple_spaces() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 161 unit: def _remove_html_comments() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 162 unit: def _replace_line_break_tags() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 163 unit: def _remove_digits_string() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 164 unit: def __reduce__() file: build_obelics/09_06_line_dedup.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 165 unit: def __reduce__() file: build_obelics/08_02_urldedup.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 166 unit: def __reduce__() file: build_obelics/11_03_set_img_urls_dedup.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 167 unit: def __reduce__() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 168 unit: def __reduce__() file: build_obelics/07_01_nsfw_image_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 169 unit: def __reduce__() file: build_obelics/07_03_nsfw_image_removal.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 170 unit: def __reduce__() file: build_obelics/12_02_remove_opt_out_images.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 171 unit: def __reduce__() file: build_obelics/06_03_remove_image_duplicates.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 172 unit: def __reduce__() file: obelics/processors/warc_downloader.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 173 unit: def tag() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 174 unit: def level() file: obelics/processors/pre_extraction_simplificator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 175 unit: def _remake_tree() file: obelics/processors/dom_tree_simplificator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 176 unit: def get_paths_subdatasets() file: obelics/processors/web_document_line_deduplication.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 177 unit: def remove_empty_els_in_list() file: obelics/processors/web_document_line_deduplication.py start line: 0 end line: 0 size: 2 LOC McCabe index: 3 number of parameters: 2 id: 178 unit: def remove_empty_el_from_list() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 3 number of parameters: 1 id: 179 unit: def remove_non_printing_characters() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 180 unit: def replace_digits_with_zeros() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 181 unit: def replace_unicode_punctuation() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 2 id: 182 unit: def compute_perplexity_score() file: obelics/processors/web_document_filtering.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 183 unit: def __init__() file: obelics/visualization/web_document_visualization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 184 unit: def set_title() file: obelics/visualization/web_document_visualization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 185 unit: def set_title() file: obelics/visualization/web_document_and_filtering_visualization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 186 unit: def non_empty_els_from_list() file: obelics/visualization/choose_filtering_parameters_web_documents_node_level.py start line: 0 end line: 0 size: 2 LOC McCabe index: 3 number of parameters: 1