def _build_networkx_graph_single()

in identity-resolution/notebooks/identity-graph/nepytune/usecase/purchase_path.py [0:0]


def _build_networkx_graph_single(query_results, thank_you_page, **kwargs):
    def drop_qs(url):
        pos = url.find("?")
        if pos == -1:
            return url
        return url[0:pos]

    def transient_attrs(transient_id, transient_dict):
        return {
            "uid": transient_id,
            "sessions_count": len(transient_dict["sessions"]),
            "time_on_device": transient_dict["sessions_duration"]
        }

    def session_attrs(session):
        return hash((session.transientId, session.events[0])), {
            "duration": get_session_duration(session),
            "events": len(session.events)
        }

    def event_to_website(graph, event, event_label):
        website = drop_qs(event.url)
        graph.add_node(website, label="website", url=website)
        graph.add_node(hash(event), label=event_label, **event._asdict())
        graph.add_edge(website, hash(event), label="links_to")

    for persistent_id, result_dict in generate_stats(query_results, **kwargs).items():
        graph = nx.MultiGraph()
        graph.add_node(persistent_id, label="persistentId", pid=persistent_id)

        for transient_id, transient_dict in result_dict["transient_ids"].items():
            graph.add_node(transient_id, label="transientId", **transient_attrs(transient_id, transient_dict))
            graph.add_edge(persistent_id, transient_id, label="has_identity")

            for session in transient_dict["sessions"]:
                event_label = "event"
                if session == transient_dict["purchase_session"]:
                    event_edge_label = "purchase_path"
                else:
                    event_edge_label = "visited"

                session_id, session_node_attrs = session_attrs(session)
                # transient -> session
                graph.add_node(session_id, label="session", **session_node_attrs)
                graph.add_edge(session_id, transient_id, label="session")

                fst_event = session.events[0]
                # event -> website without query strings
                event_to_website(graph, fst_event, event_label)

                # session -> first session event
                graph.add_edge(session_id, hash(fst_event), label="session_start")

                for fst_event, snd_event in consecutive_pairs(session.events):
                    event_to_website(graph, fst_event, event_label)
                    event_to_website(graph, snd_event, event_label)
                    graph.add_edge(hash(fst_event), hash(snd_event), label=event_edge_label)
        graph.nodes[result_dict["first_device"]]["size"] = 15

        yield persistent_id, graph