in obelics/processors/pre_extraction_simplificator.py [0:0]
def make_tree(self, selectolax_node, page_url, path_in_tree=[]):
tag = selectolax_node.tag
path_in_tree = path_in_tree + [[tag, self.num_nodes]]
self.num_nodes += 1
if tag in MEDIA_CONTAIN_INTERESTING_ATTRIBUTES_SET:
return Node(
path_in_tree=path_in_tree,
media_info=simplify_media_node(selectolax_node, page_url=page_url),
text="",
children=[],
)
elif tag == "-text":
return Node(
path_in_tree=path_in_tree,
media_info=None,
text=selectolax_node.text(deep=False, separator="", strip=False),
children=[],
)
return Node(
path_in_tree=path_in_tree,
media_info=None,
text="",
children=[
self.make_tree(child, page_url=page_url, path_in_tree=path_in_tree)
for child in selectolax_node.iter(include_text=True)
],
)