def construct_graph()

in src/graph_construction.py [0:0]


def construct_graph(docs, coref_predictor, oie_predictor):
    graph = nx.Graph()
    node_names_list = []
    node_vec_list = []
    node_counter = 0
    # Get Coref:
    tokenized_docs = []
    coref_info_list = []
    start_time = time.time()
    for ind, d in enumerate(docs):
        dtokens, cinfo = extract_coref(coref_predictor, d)
        tokenized_docs.append(" ".join(dtokens))
        coref_info_list.append(cinfo)
    #print(f"Time for coref extraction for all documents: {time.time()-start_time}")
 
    tfidfs_list, word_index_mapping, total_num_words = compute_base_features(tokenized_docs)
    generic_tfidfs = {} # This is to handle errors when sub_name is not in current docs tfidfs
    for tfidf in tfidfs_list:
        for k,v in tfidf.items():
            if generic_tfidfs.get(k) is None:
                generic_tfidfs[k] = v
            else:
                if v>generic_tfidfs[k]:
                    generic_tfidfs[k] = v
                
    #print(generic_tfidfs)
    oies_list = []
    directed_edges = {}
    for ind, d in enumerate(docs):
        start_time = time.time()
        # Get OIE:
        oies = extract_oie(oie_predictor, d)
        #print(f"Time for extracting oie for doc-{ind} is {time.time()-start_time}")
        coref_info = coref_info_list[ind]
        tfidfs = tfidfs_list[ind]
        #print(tfidfs)
        oies_list.append(oies)
        for x in oies:
            #print(x)
            # Node for subject / ARG0
            uid = "{}-{}".format(x["ARG0"]["index"][0], x["ARG0"]["index"][1])
            if coref_info.get(uid) is not None:
                sub_name = coref_info[uid]["unique"]
            else:
                sub_name = x["ARG0"]["span"]
            # Add vector
            sub_name_vector = np.zeros(total_num_words)
            for word in sub_name.split():
                try:
                    sub_name_vector[word_index_mapping[word]]= tfidfs[word] if tfidfs.get(word) is not None else generic_tfidfs[word]
                except:
                    print(f"Exception Occurred for word: {word}")

            sim_node_ind = similar_match(sub_name, node_names_list, sub_name_vector, node_vec_list)
            if sim_node_ind == -1:
                graph.add_node(node_counter, text=sub_name, weight=1)
                sub_index = node_counter
                node_counter += 1
                node_names_list.append(sub_name)
                node_vec_list.append(sub_name_vector)
            else:
                sub_index = sim_node_ind
                graph.nodes[sub_index]["weight"] += 1


            # Node for Object / ARG1
            uid = "{}-{}".format(x["ARG1"]["index"][0], x["ARG1"]["index"][1])
            if coref_info.get(uid) is not None:
                obj_name = coref_info[uid]["unique"]
            else:
                obj_name = x["ARG1"]["span"]
            # Add vector
            obj_name_vector = np.zeros(total_num_words)
            for word in obj_name.split():
                try:
                    obj_name_vector[word_index_mapping[word]]=tfidfs[word] if tfidfs.get(word) is not None else generic_tfidfs[word]
                except:
                    print(f"Exception Occurred for word: {word}")


            sim_node_ind = similar_match(obj_name, node_names_list, obj_name_vector, node_vec_list)
            if sim_node_ind == -1:
                graph.add_node(node_counter, text=obj_name, weight=1)
                obj_index = node_counter
                node_counter += 1
                node_names_list.append(obj_name)
                node_vec_list.append(obj_name_vector)
            else:
                obj_index = sim_node_ind
                graph.nodes[obj_index]["weight"] += 1

            # Edge info
            pred_name = x["V"]["span"]
            if graph.has_edge(sub_index, obj_index):
                sim_ind = similar_match(pred_name, [x["text"] for x in graph[sub_index][obj_index]["preds"]]) # NOTE: no vector matching 
                if sim_ind==-1:
                    graph[sub_index][obj_index]["preds"].append({"text":pred_name, "weight":1})
                else:
                    graph[sub_index][obj_index]["preds"][sim_ind]["weight"] += 1
            else:
                graph.add_edge(sub_index, obj_index, preds=[{"text":pred_name, "weight":1}])
                directed_edges[f"{sub_index}-{obj_index}"] = 1

    
    extra_info = {}
    extra_info['coref_info'] = coref_info_list
    extra_info['oie_info'] = oies_list
    extra_info['tfidfs'] = tfidfs_list
    extra_info['generic_tfidfs'] = generic_tfidfs
    extra_info['word_index_mapping'] = word_index_mapping
    extra_info['total_num_words']  = total_num_words
    graph_info = {'graph': graph, 'directed_edges': directed_edges}
    return graph_info, extra_info