in identity-resolution/notebooks/identity-graph/nepytune/usecase/purchase_path.py [0:0]
def _build_networkx_graph_single(query_results, thank_you_page, **kwargs):
def drop_qs(url):
pos = url.find("?")
if pos == -1:
return url
return url[0:pos]
def transient_attrs(transient_id, transient_dict):
return {
"uid": transient_id,
"sessions_count": len(transient_dict["sessions"]),
"time_on_device": transient_dict["sessions_duration"]
}
def session_attrs(session):
return hash((session.transientId, session.events[0])), {
"duration": get_session_duration(session),
"events": len(session.events)
}
def event_to_website(graph, event, event_label):
website = drop_qs(event.url)
graph.add_node(website, label="website", url=website)
graph.add_node(hash(event), label=event_label, **event._asdict())
graph.add_edge(website, hash(event), label="links_to")
for persistent_id, result_dict in generate_stats(query_results, **kwargs).items():
graph = nx.MultiGraph()
graph.add_node(persistent_id, label="persistentId", pid=persistent_id)
for transient_id, transient_dict in result_dict["transient_ids"].items():
graph.add_node(transient_id, label="transientId", **transient_attrs(transient_id, transient_dict))
graph.add_edge(persistent_id, transient_id, label="has_identity")
for session in transient_dict["sessions"]:
event_label = "event"
if session == transient_dict["purchase_session"]:
event_edge_label = "purchase_path"
else:
event_edge_label = "visited"
session_id, session_node_attrs = session_attrs(session)
# transient -> session
graph.add_node(session_id, label="session", **session_node_attrs)
graph.add_edge(session_id, transient_id, label="session")
fst_event = session.events[0]
# event -> website without query strings
event_to_website(graph, fst_event, event_label)
# session -> first session event
graph.add_edge(session_id, hash(fst_event), label="session_start")
for fst_event, snd_event in consecutive_pairs(session.events):
event_to_website(graph, fst_event, event_label)
event_to_website(graph, snd_event, event_label)
graph.add_edge(hash(fst_event), hash(snd_event), label=event_edge_label)
graph.nodes[result_dict["first_device"]]["size"] = 15
yield persistent_id, graph