in microservices/course_ingestion/services/clustering/hierarchical_clustering.py [0:0]
def check_for_duplicate_titles(topic_tree, node_level,
create_learning_units, is_list=True):
"""Merges the nodes at particular level with same title"""
if node_level == "course":
updated_topic_tree = []
for competency in topic_tree:
competency = check_for_duplicate_titles(
competency, "competency", create_learning_units,
is_list=False)
updated_topic_tree.append(competency)
return updated_topic_tree
elif node_level == "competency":
updated_topic_tree = []
if is_list:
sub_competencies = topic_tree
else:
sub_competencies = topic_tree["sub_competencies"]
for sub_competency in sub_competencies:
sub_competency = check_for_duplicate_titles(
sub_competency, "sub_competency", create_learning_units,
is_list=False)
sub_competency = handle_duplicate_titles_sub_comp(sub_competency)
updated_topic_tree.append(sub_competency)
if is_list:
return updated_topic_tree
else:
topic_tree["sub_competencies"] = updated_topic_tree
return topic_tree
elif node_level == "sub_competency":
updated_topic_tree = []
titles_dict = {}
if is_list:
learning_objectives = topic_tree
else:
learning_objectives = topic_tree["learning_objectives"]
for i, learning_objective in enumerate(learning_objectives):
title = learning_objective["title"]
if title not in titles_dict:
titles_dict[title] = i
else:
prev_index = titles_dict[title]
prev_lo = learning_objectives[prev_index]
prev_lo["document_ids"].extend(
learning_objective["document_ids"])
prev_lo["text"] = "<p>".join([
prev_lo["text"], learning_objective["text"]])
if "learning_units" in prev_lo:
prev_lo["learning_units"].extend(
learning_objective["learning_units"]
)
for _, value in titles_dict.items():
updated_topic_tree.append(learning_objectives[value])
final_tree = []
if create_learning_units:
for learning_objective in updated_topic_tree:
learning_objective = check_for_duplicate_titles(
learning_objective, "learning_objective",
create_learning_units, is_list=False)
final_tree.append(learning_objective)
updated_topic_tree = final_tree
if is_list:
return updated_topic_tree
else:
topic_tree["learning_objectives"] = updated_topic_tree
return topic_tree
elif node_level == "learning_objective":
updated_topic_tree = []
titles_dict = {}
if is_list:
learning_units = topic_tree
else:
learning_units = topic_tree["learning_units"]
for i, learning_unit in enumerate(learning_units):
title = learning_unit["title"]
if title not in titles_dict:
titles_dict[title] = i
else:
prev_index = titles_dict[title]
prev_lu = learning_units[prev_index]
prev_lu["document_ids"].extend(
learning_unit["document_ids"])
prev_lu["text"] = "<p>".join([prev_lu["text"], learning_unit["text"]])
if "triples" in prev_lu:
prev_lu["triples"].extend(
learning_unit["triples"]
)
for _, value in titles_dict.items():
updated_topic_tree.append(learning_units[value])
if is_list:
return updated_topic_tree
else:
topic_tree["learning_units"] = updated_topic_tree
return topic_tree