in python/json_duplicate_filter.py [0:0]
def update_datapoint(line, dataset):
curr_object = dataset[line["URI"]]
for new_question in line["Questions"]:
new_question_text = get_full_question(new_question)
if len(new_question_text) > 0:
new_question_text = normalize_answer(new_question_text)
if new_question_text in curr_object["Questions"].keys():
for new_answer in new_question["Answers"]:
new_answer_text = get_full_answer(new_answer)
if len(new_answer_text) > 0:
new_answer_text = normalize_answer(new_answer_text)
if (
new_answer_text
not in curr_object["Questions"][new_question_text][
"Answers"
]
):
curr_object["Questions"][new_question_text]["Answers"][
new_answer_text
] = new_answer
else:
condensed_question = copy.copy(new_question)
condensed_question.pop("Answers")
curr_object["Questions"][
normalize_answer(get_full_question(condensed_question))
] = condensed_question
dataset[line["URI"]]["Questions"][
normalize_answer(get_full_question(condensed_question))
]["Answers"] = {}
for answer in new_question["Answers"]:
curr_object["Questions"][
normalize_answer(get_full_question(condensed_question))
]["Answers"][normalize_answer(get_full_answer(answer))] = answer
return dataset