in obelics/processors/dom_tree_simplificator.py [0:0]
def _strip_special_divs(self, selectolax_tree):
special_div_ids = ["footer", "header", "navigation", "nav", "navbar", "menu"]
modification = True
while modification:
# Traverse the tree to find one node to remove, and remove it right then
# to avoid the recursivity problem with `decompose`
found_a_node = False
for node in selectolax_tree.root.traverse():
if node.tag == "div":
attributes = node.attributes
if (
("id" in attributes and attributes["id"] in special_div_ids)
or ("class" in attributes and attributes["class"] in special_div_ids)
or ("title" in attributes and attributes["title"] in special_div_ids)
):
node.decompose()
found_a_node = True
break
if not found_a_node:
modification = False
return selectolax_tree