build_obelics/02_bis_extract_html_get_image_urls_new_rules.py [96:117]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        extraction_params = yaml.load(f, Loader=yaml.FullLoader)

    dom_tree_simplificator = DOMTreeSimplificator(
        strip_multiple_linebreaks=extraction_params["dom_tree_simplificator"]["strip_multiple_linebreaks"],
        strip_multiple_spaces=extraction_params["dom_tree_simplificator"]["strip_multiple_spaces"],
        remove_html_comments=extraction_params["dom_tree_simplificator"]["remove_html_comments"],
        replace_line_break_tags=extraction_params["dom_tree_simplificator"]["replace_line_break_tags"],
        unwrap_tags=extraction_params["dom_tree_simplificator"]["unwrap_tags"],
        strip_tags=extraction_params["dom_tree_simplificator"]["strip_tags"],
        strip_special_divs=extraction_params["dom_tree_simplificator"]["strip_special_divs"],
        remove_dates=extraction_params["dom_tree_simplificator"]["remove_dates"],
        remove_empty_leaves=extraction_params["dom_tree_simplificator"]["remove_empty_leaves"],
        unnest_nodes=extraction_params["dom_tree_simplificator"]["unnest_nodes"],
        remake_tree=extraction_params["dom_tree_simplificator"]["remake_tree"],
        css_rules=extraction_params["dom_tree_simplificator"]["css_rules"],
        css_rules_replace_with_text=extraction_params["dom_tree_simplificator"]["css_rules_replace_with_text"],
    )
    pre_extraction_simplificator = PreExtractionSimplificator(
        only_text_image_nodes=extraction_params["pre_extraction_simplificator"]["only_text_image_nodes"],
        format_texts=extraction_params["pre_extraction_simplificator"]["format_texts"],
        merge_consecutive_text_nodes=extraction_params["pre_extraction_simplificator"]["merge_consecutive_text_nodes"],
    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



obelics/callers/extract_web_documents.py [125:146]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        extraction_params = yaml.load(f, Loader=yaml.FullLoader)

    dom_tree_simplificator = DOMTreeSimplificator(
        strip_multiple_linebreaks=extraction_params["dom_tree_simplificator"]["strip_multiple_linebreaks"],
        strip_multiple_spaces=extraction_params["dom_tree_simplificator"]["strip_multiple_spaces"],
        remove_html_comments=extraction_params["dom_tree_simplificator"]["remove_html_comments"],
        replace_line_break_tags=extraction_params["dom_tree_simplificator"]["replace_line_break_tags"],
        unwrap_tags=extraction_params["dom_tree_simplificator"]["unwrap_tags"],
        strip_tags=extraction_params["dom_tree_simplificator"]["strip_tags"],
        strip_special_divs=extraction_params["dom_tree_simplificator"]["strip_special_divs"],
        remove_dates=extraction_params["dom_tree_simplificator"]["remove_dates"],
        remove_empty_leaves=extraction_params["dom_tree_simplificator"]["remove_empty_leaves"],
        unnest_nodes=extraction_params["dom_tree_simplificator"]["unnest_nodes"],
        remake_tree=extraction_params["dom_tree_simplificator"]["remake_tree"],
        css_rules=extraction_params["dom_tree_simplificator"]["css_rules"],
        css_rules_replace_with_text=extraction_params["dom_tree_simplificator"]["css_rules_replace_with_text"],
    )
    pre_extraction_simplificator = PreExtractionSimplificator(
        only_text_image_nodes=extraction_params["pre_extraction_simplificator"]["only_text_image_nodes"],
        format_texts=extraction_params["pre_extraction_simplificator"]["format_texts"],
        merge_consecutive_text_nodes=extraction_params["pre_extraction_simplificator"]["merge_consecutive_text_nodes"],
    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



