def search_tree()

in python/mhtml_to_json.py [0:0]


def search_tree(node, json_context):
    if "itemtype" in node.keys() and "//schema.org/Answer" in node.get("itemtype"):
        if "Answers" not in json_context.keys():
            # Stacked question (not in the schema.org definition)
            if node.getparent() is not None:
                node.getparent().remove(node)
            return
        else:
            json_context["Answers"].append({})
            json_context = json_context["Answers"][-1]
    for child in node:
        search_tree(child, json_context)
    if "itemtype" in node.keys():
        if "//schema.org/Question" in node.get("itemtype"):
            if "Answers" not in json_context.keys():
                # Stacked question (not in the schema.org definition)
                if node.getparent() is not None:
                    node.getparent().remove(node)
                return
            else:
                element = collect_question(node)
                json_context.update(element)
            if node.getparent() is not None:
                node.getparent().remove(node)
        elif "//schema.org/Answer" in node.get("itemtype"):
            element = collect_answer(node)
            json_context.update(element)
            if node.getparent() is not None:
                node.getparent().remove(node)
        elif "//schema.org/Person" in node.get("itemtype"):
            element = collect_person(node)
            if element is not None:
                json_context.update(element)
            if node.getparent() is not None:
                node.getparent().remove(node)