def _show_most_common_visited_webpages()

in identity-resolution/notebooks/identity-graph/nepytune/usecase/purchase_path.py [0:0]


def _show_most_common_visited_webpages(data, title, count):
    def drop_qs(url):
        pos = url.find("?")
        if pos == -1:
            return url
        return url[0:pos]

    def compute_data(data):
        res = defaultdict(list)
        for v in data.values():
            for values in v["transient_ids"].values():
                for session in values["sessions"]:
                    for event in session.events:
                        res[drop_qs(event.url)].append(session.persistentId)
        return res

    def sunburst_data(data):
        total_sum = sum(len(v) for v in data.values())
        yield "", "websites", total_sum, ""
        for i, (website, persistents) in enumerate(data.items()):
            yield (
                "websites", f"Website {i}",
                len(persistents),
                f"<br>website: {website}"
                f"<br>users: {len(set(persistents))}"
                f"<br>events: {len(persistents)}"
            )
            for persistent, group in itertools.groupby(
                sorted(list(persistents)),
            ):
                group = list(group)
                yield (
                    f"Website {i}", persistent[:5],
                    len(group),
                    f"<br>persistentId: {persistent}"
                    f"<br>events: {len(group)}"
                )

    events_data = compute_data(data)
    most_common = dict(sorted(events_data.items(), key=lambda x: -len(x[1]))[:count])
    most_common_counts = {k: len(v) for k, v in most_common.items()}

    pie_chart = go.Pie(
        labels=list(most_common_counts.keys()),
        values=list(most_common_counts.values()),
        marker=dict(line=dict(color='DarkSlateGrey', width=0.5)),
        domain=dict(column=0)
    )

    parents, labels, values, hovers = zip(*[r for r in list(sunburst_data(most_common))])

    sunburst = go.Sunburst(
        labels=labels,
        parents=parents,
        values=values,
        branchvalues="total",
        marker=dict(
            line=dict(width=0.5, color='DarkSlateGrey')
        ),
        hovertext=hovers,
        hoverinfo="text",
        domain=dict(column=1)
    )

    layout = go.Layout(
        grid=go.layout.Grid(columns=2, rows=1),
        margin=go.layout.Margin(t=50, l=0, r=0, b=0),
        title=title,
        legend_orientation="h"
    )

    return go.Figure([pie_chart, sunburst], layout)