playground/process_analysis/status_transition_graph_vistualizer.py (124 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import enum import statistics from dataclasses import dataclass, field from itertools import groupby from operator import itemgetter import graphviz from playground.process_analysis.status_transition_graph import StatusTransitionGraph class StatisticLabelConfig(enum.Enum): AVG = "avg" MEDIAN = "med" IQR = "iqr" MIN_MAX = "min-max" # pylint: disable=too-many-instance-attributes @dataclass class VisualizerConfig: font: str = "Arial" fontsize: str = "12" sub_fontsize: str = "8" node_border_color: str = "darkslategray" edge_color: str = "darkslategray" category_fill_color: dict[str, str] = field( default_factory=lambda: { "TODO": "lightgray", "IN_PROGRESS": "yellow", "DONE": "green", } ) fallback_fill_color: str = "aliceblue" node_penwidth_factor: float = 10.0 edge_penwidth_factor: float = 40.0 class StatusTransitionGraphVisualizer: """ Visualize a status transition graph using Graphviz. """ def __init__(self, config: VisualizerConfig | None = None) -> None: if config is None: config = VisualizerConfig() self.config = config def visualize( self, source: StatusTransitionGraph, threshold: float = 1.0, label_statistic: StatisticLabelConfig = StatisticLabelConfig.AVG, ) -> graphviz.Digraph: """ Create a Graphviz digraph from a StatusTransitionGraph. Args: source: The StatusTransitionGraph to visualize. threshold: Number between 0.0 and 1.0, defaults to 1.0 (or 100%.) Exclude edges from the visualization that represent less than the percentage of total status transition within the threshold. """ dot_graph = graphviz.Digraph("Status Transitions", format="svg", strict=True) dot_graph.attr("graph", rankdir="TD") dot_graph.attr("node", color=self.config.node_border_color, **self.__default_attrs()) dot_graph.attr("edge", color=self.config.edge_color, **self.__default_attrs()) graph = source.graph for category, nodes in groupby(graph.nodes(data="category"), itemgetter(1)): with dot_graph.subgraph(name=f"{category}") as cluster: cluster.attr(label=str(category)) cluster.attr(rank=self.__category_rank(category)) cluster.attr("node", fillcolor=self.__category_color(category)) for node, _ in nodes: count = graph.nodes[node]["count"] penwidth = count / source.total_transition_count * self.config.node_penwidth_factor cluster.node( name=node, label=self.__node_label(node, count), penwidth=str(round(penwidth, 2)), ) for edge in graph.edges.data(): to_count = len(edge[2]["durations"]) if to_count > (1.00 - threshold) * source.total_transition_count: penwidth = to_count / source.total_transition_count * self.config.edge_penwidth_factor dot_graph.edge( edge[0], edge[1], labeltooltip=self.__edge_tooltip(edge[0], edge[1], edge[2]["durations"]), label=self.__edge_label(edge[2]["durations"], label_statistic), penwidth=str(round(penwidth, 2)), ) return dot_graph @staticmethod def is_dot_executable_available() -> bool: """ Check if the 'dot' executable is available. """ try: graphviz.version() return True except graphviz.ExecutableNotFound: return False def __category_rank(self, category: str) -> str: match category: case "TODO": return "min" case "DONE": return "max" case _: return "" def __category_color(self, category: str) -> str: return self.config.category_fill_color.get(category, self.config.fallback_fill_color) def __node_label(self, name: str, count: int) -> str: return f"<{name}<BR/>{self.__count_label(count)}>" def __edge_label(self, durations: list[float], label_statistic: StatisticLabelConfig) -> str: stat = "" match label_statistic: case StatisticLabelConfig.AVG: stat = f"{statistics.mean(durations):0.1f} days avg" case StatisticLabelConfig.MEDIAN: stat = f"{statistics.median(durations):0.1f} days med" case StatisticLabelConfig.IQR: if len(durations) >= 4: quantiles = statistics.quantiles(durations, n=4) stat = f"{quantiles[0]:0.1f} - {quantiles[2]:0.1f} days IQR (25-75%)" case StatisticLabelConfig.MIN_MAX: if len(durations) >= 2: stat = f"{min(durations):0.1f} - {max(durations):0.1f} days min-max" count = len(durations) return f"<{stat}<BR/>{self.__count_label(count)}>" def __count_label(self, count: int) -> str: return f'<FONT POINT-SIZE="{self.config.sub_fontsize}">({str(count)+"x"})</FONT>' def __edge_tooltip(self, from_state: str, to_state: str, durations: list[float]) -> str: lines = [] lines.append(f"{from_state} ⮕ {to_state} ({len(durations)}x)") lines.append(f"avg: {statistics.mean(durations):0.1f} days") lines.append(f"med: {statistics.median(durations):0.1f} days") if len(durations) >= 4: quantiles = statistics.quantiles(durations, n=4) lines.append(f"IQR (25-75%): {quantiles[0]:0.1f} - {quantiles[2]:0.1f} days") if len(durations) >= 2: lines.append(f"min-max: {min(durations):0.1f} - {max(durations):0.1f} days") return "\n".join(lines) def __default_attrs(self) -> dict: return { "style": "filled", "fontname": self.config.font, "fontsize": self.config.fontsize, }