def construct_graph()

in source/sagemaker/sagemaker_graph_entity_resolution/dgl_entity_resolution/graph.py [0:0]


def construct_graph(training_dir, training_edges, transient_nodes, transient_edges, website_nodes, website_edges):

    def _full_path(f):
        return os.path.join(training_dir, f)

    edgelists, id_to_node = {}, {}

    # parse and add training edges
    training_edgelist, id_to_node = parse_edgelist(_full_path(training_edges), id_to_node,
                                                   source_type='user', sink_type='user')
    print("Read user -> user training edgelist from {}".format(_full_path(training_edges)))
    edgelists[('user', 'same_entity', 'user')] = training_edgelist
    edgelists[('user', 'same_entity_reversed', 'user')] = [(b, a) for a, b in training_edgelist]

    # parse and add transient edges
    transient_edgelist, id_to_node = parse_edgelist(_full_path(transient_edges), id_to_node,
                                                    source_type='user', sink_type='website')
    print("Read user -> website edgelist from {}".format(_full_path(transient_edges)))
    edgelists[('user', 'visits', 'website')] = transient_edgelist
    edgelists[('website', 'visited_by', 'user')] = [(b, a) for a, b in transient_edgelist]

    # parse and add website edges
    website_edgelist, id_to_node = parse_edgelist(_full_path(website_edges), id_to_node,
                                                  source_type='website', sink_type='domain')
    print("Read website -> domain edgelist from {}".format(_full_path(website_edges)))
    edgelists[('website', 'owned_by', 'domain')] = website_edgelist
    edgelists[('domain', 'owns', 'website')] = [(b, a) for a, b in website_edgelist]

    # get user features
    user_features, new_nodes = get_features(id_to_node['user'], _full_path(transient_nodes))
    print("Got user features from {}".format(_full_path(transient_nodes)))

    # add self relation to user nodes
    edgelists[('user', 'self_relation', 'user')] = [(u, u) for u in id_to_node['user'].values()]

    # get website features
    website_features = get_website_features(id_to_node['website'], _full_path(website_nodes))
    print("Got website features from {}".format(_full_path(website_nodes)))

    g = dgl.heterograph(edgelists)
    print("Constructed heterograph with the following metagraph structure: Node types {}, Edge types{}".format(
            g.ntypes, g.canonical_etypes))
    print("Number of user nodes : {}".format(g.number_of_nodes('user')))

    reverse_etypes = {'same_entity': 'same_entity_reversed',
                      'same_entity_reversed': 'same_entity',
                      'visits': 'visited_by',
                      'visited_by': 'visits',
                      'owned_by': 'owns',
                      'owns': 'owned_by'
                      }

    print(g)

    return g, (user_features, website_features), id_to_node, reverse_etypes