def check_for_duplicate_titles()

in microservices/course_ingestion/services/clustering/hierarchical_clustering.py [0:0]


def check_for_duplicate_titles(topic_tree, node_level,
    create_learning_units, is_list=True):
  """Merges the nodes at particular level with same title"""
  if node_level == "course":
    updated_topic_tree = []
    for competency in topic_tree:
      competency = check_for_duplicate_titles(
        competency, "competency", create_learning_units,
        is_list=False)
      updated_topic_tree.append(competency)
    return updated_topic_tree
  elif node_level == "competency":
    updated_topic_tree = []
    if is_list:
      sub_competencies = topic_tree
    else:
      sub_competencies = topic_tree["sub_competencies"]
    for sub_competency in sub_competencies:
      sub_competency = check_for_duplicate_titles(
        sub_competency, "sub_competency", create_learning_units,
        is_list=False)
      sub_competency = handle_duplicate_titles_sub_comp(sub_competency)
      updated_topic_tree.append(sub_competency)
    if is_list:
      return updated_topic_tree
    else:
      topic_tree["sub_competencies"] = updated_topic_tree
      return topic_tree
  elif node_level == "sub_competency":
    updated_topic_tree = []
    titles_dict = {}
    if is_list:
      learning_objectives = topic_tree
    else:
      learning_objectives = topic_tree["learning_objectives"]
    for i, learning_objective in enumerate(learning_objectives):
      title = learning_objective["title"]
      if title not in titles_dict:
        titles_dict[title] = i
      else:
        prev_index = titles_dict[title]
        prev_lo = learning_objectives[prev_index]
        prev_lo["document_ids"].extend(
          learning_objective["document_ids"])
        prev_lo["text"] = "<p>".join([
          prev_lo["text"], learning_objective["text"]])
        if "learning_units" in prev_lo:
          prev_lo["learning_units"].extend(
            learning_objective["learning_units"]
          )
    for _, value in titles_dict.items():
      updated_topic_tree.append(learning_objectives[value])
    final_tree = []
    if create_learning_units:
      for learning_objective in updated_topic_tree:
        learning_objective = check_for_duplicate_titles(
          learning_objective, "learning_objective",
          create_learning_units, is_list=False)
        final_tree.append(learning_objective)
        updated_topic_tree = final_tree

    if is_list:
      return updated_topic_tree
    else:
      topic_tree["learning_objectives"] = updated_topic_tree
      return topic_tree

  elif node_level == "learning_objective":
    updated_topic_tree = []
    titles_dict = {}
    if is_list:
      learning_units = topic_tree
    else:
      learning_units = topic_tree["learning_units"]
    for i, learning_unit in enumerate(learning_units):
      title = learning_unit["title"]
      if title not in titles_dict:
        titles_dict[title] = i
      else:
        prev_index = titles_dict[title]
        prev_lu = learning_units[prev_index]
        prev_lu["document_ids"].extend(
          learning_unit["document_ids"])
        prev_lu["text"] = "<p>".join([prev_lu["text"], learning_unit["text"]])
        if "triples" in prev_lu:
          prev_lu["triples"].extend(
            learning_unit["triples"]
          )
    for _, value in titles_dict.items():
      updated_topic_tree.append(learning_units[value])
    if is_list:
      return updated_topic_tree
    else:
      topic_tree["learning_units"] = updated_topic_tree
      return topic_tree