in python/mhtml_to_json.py [0:0]
def search_tree(node, json_context):
if "itemtype" in node.keys() and "//schema.org/Answer" in node.get("itemtype"):
if "Answers" not in json_context.keys():
# Stacked question (not in the schema.org definition)
if node.getparent() is not None:
node.getparent().remove(node)
return
else:
json_context["Answers"].append({})
json_context = json_context["Answers"][-1]
for child in node:
search_tree(child, json_context)
if "itemtype" in node.keys():
if "//schema.org/Question" in node.get("itemtype"):
if "Answers" not in json_context.keys():
# Stacked question (not in the schema.org definition)
if node.getparent() is not None:
node.getparent().remove(node)
return
else:
element = collect_question(node)
json_context.update(element)
if node.getparent() is not None:
node.getparent().remove(node)
elif "//schema.org/Answer" in node.get("itemtype"):
element = collect_answer(node)
json_context.update(element)
if node.getparent() is not None:
node.getparent().remove(node)
elif "//schema.org/Person" in node.get("itemtype"):
element = collect_person(node)
if element is not None:
json_context.update(element)
if node.getparent() is not None:
node.getparent().remove(node)