in identity-resolution/notebooks/identity-graph/nepytune/usecase/purchase_path.py [0:0]
def _show_most_common_visited_webpages(data, title, count):
def drop_qs(url):
pos = url.find("?")
if pos == -1:
return url
return url[0:pos]
def compute_data(data):
res = defaultdict(list)
for v in data.values():
for values in v["transient_ids"].values():
for session in values["sessions"]:
for event in session.events:
res[drop_qs(event.url)].append(session.persistentId)
return res
def sunburst_data(data):
total_sum = sum(len(v) for v in data.values())
yield "", "websites", total_sum, ""
for i, (website, persistents) in enumerate(data.items()):
yield (
"websites", f"Website {i}",
len(persistents),
f"<br>website: {website}"
f"<br>users: {len(set(persistents))}"
f"<br>events: {len(persistents)}"
)
for persistent, group in itertools.groupby(
sorted(list(persistents)),
):
group = list(group)
yield (
f"Website {i}", persistent[:5],
len(group),
f"<br>persistentId: {persistent}"
f"<br>events: {len(group)}"
)
events_data = compute_data(data)
most_common = dict(sorted(events_data.items(), key=lambda x: -len(x[1]))[:count])
most_common_counts = {k: len(v) for k, v in most_common.items()}
pie_chart = go.Pie(
labels=list(most_common_counts.keys()),
values=list(most_common_counts.values()),
marker=dict(line=dict(color='DarkSlateGrey', width=0.5)),
domain=dict(column=0)
)
parents, labels, values, hovers = zip(*[r for r in list(sunburst_data(most_common))])
sunburst = go.Sunburst(
labels=labels,
parents=parents,
values=values,
branchvalues="total",
marker=dict(
line=dict(width=0.5, color='DarkSlateGrey')
),
hovertext=hovers,
hoverinfo="text",
domain=dict(column=1)
)
layout = go.Layout(
grid=go.layout.Grid(columns=2, rows=1),
margin=go.layout.Margin(t=50, l=0, r=0, b=0),
title=title,
legend_orientation="h"
)
return go.Figure([pie_chart, sunburst], layout)