in python/mhtml_to_json.py [0:0]
def collect_question(node):
question = {}
# name
name_node = find_itemprop(node, "name")
if name_node is not None:
name_node = text_cleanup(name_node)
question["name_markup"] = turn_into_string(name_node)
# text
text_node = find_itemprop(node, "text")
if text_node is not None:
text_node = text_cleanup(text_node)
question["text_markup"] = turn_into_string(text_node)
# date/time {created|modified|published}
date_created = find_itemprop(node, "dateCreated")
if date_created is not None:
date_created = date_created.get("datetime")
question["date_created"] = date_created
date_modified = find_itemprop(node, "dateModified")
if date_modified is not None:
date_modified = date_modified.get("datetime")
question["date_modified"] = date_modified
date_published = find_itemprop(node, "datePublished")
if date_published is not None:
date_published = date_published.get("datetime")
question["date_published"] = date_published
# upvote count
upvote_count = find_itemprop(node, "upvoteCount")
if upvote_count is not None:
if upvote_count.tag == "meta":
upvote_count = upvote_count.get("content")
else:
upvote_count = upvote_count.text
question["upvote_count"] = upvote_count
# downvote count
downvote_count = find_itemprop(node, "downvoteCount")
if downvote_count is not None:
if downvote_count.tag == "meta":
downvote_count = downvote_count.get("content")
else:
downvote_count = downvote_count.text
question["downvote_count"] = downvote_count
# comment count
comment_count = find_itemprop(node, "commentCount")
if comment_count is not None:
if comment_count.tag == "meta":
comment_count = comment_count.get("content")
else:
comment_count = comment_count.text
question["comment_count"] = comment_count
# Answer count
answer_count = find_itemprop(node, "answerCount")
if answer_count is not None:
if answer_count.tag == "meta":
answer_count = answer_count.get("content")
else:
answer_count = answer_count.text
question["answer_count"] = answer_count
return question