in smdebug/profiler/analysis/notebook_utils/heatmap.py [0:0]
def preprocess_system_metrics(self, events, system_metrics):
# read all available system metric events and store them in dict
for event in events:
if event.node_id not in system_metrics:
system_metrics[event.node_id] = {}
if event.dimension not in system_metrics[event.node_id]:
system_metrics[event.node_id][event.dimension] = {}
if event.name not in system_metrics[event.node_id][event.dimension]:
system_metrics[event.node_id][event.dimension][event.name] = []
system_metrics[event.node_id][event.dimension][event.name].append(event.value)
# number of datapoints
self.width = np.inf
# preprocess data
for node in system_metrics:
for dimension in system_metrics[node]:
if dimension not in self.available_dimensions:
self.available_dimensions.append(dimension)
for event in system_metrics[node][dimension]:
# list of available events
if event not in self.available_events:
self.available_events.append(event)
# convert to numpy
system_metrics[node][dimension][event] = np.array(
system_metrics[node][dimension][event]
)
# we may not have the exact same number of measurements per metric
if system_metrics[node][dimension][event].shape[0] < self.width:
self.width = system_metrics[node][dimension][event].shape[0]
# convert metrics to percentages
if dimension in ["Algorithm", "Platform", ""]:
max_value = np.max(system_metrics[node][dimension][event])
if max_value != 0:
system_metrics[node][dimension][event] = (
system_metrics[node][dimension][event] / max_value
)
system_metrics[node][dimension][event] = (
system_metrics[node][dimension][event] * 100
)
# compute total utilization per event dimension
for node in system_metrics:
for dimension in system_metrics[node]:
n = len(system_metrics[node][dimension])
total = [sum(x) for x in zip(*system_metrics[node][dimension].values())]
system_metrics[node][dimension]["total"] = np.array(total) / n
self.available_events.append("total")
nodes = list(system_metrics.keys())
system_metrics["node_total"] = {}
# compute total utilization per worker node
for dimension in system_metrics[nodes[0]]:
system_metrics["node_total"][dimension] = {}
node_total = []
for node in nodes:
len2 = len(node_total)
if len2 > 0:
len1 = system_metrics[node][dimension]["total"].shape[0]
if len1 < len2:
node_total[:len1] = (
node_total[:len1] + system_metrics[node][dimension]["total"]
)
else:
node_total = node_total + system_metrics[node][dimension]["total"][:len2]
else:
node_total = deepcopy(system_metrics[node][dimension]["total"])
system_metrics["node_total"][dimension]["total"] = node_total / (len(nodes))
# filter events and dimensions
self.filtered_events = []
print(f"select events:{self.select_events}")
self.filtered_dimensions = []
print(f"select dimensions:{self.select_dimensions}")
for metric in self.select_events:
r = re.compile(r".*" + metric)
self.filtered_events.extend(list(filter(r.search, self.available_events)))
self.filtered_events = set(self.filtered_events)
print(f"filtered_events:{self.filtered_events}")
for metric in self.select_dimensions:
r = re.compile(metric) # + r".*")
self.filtered_dimensions.extend(list(filter(r.search, self.available_dimensions)))
self.filtered_dimensions = set(self.filtered_dimensions)
print(f"filtered_dimensions:{self.filtered_dimensions}")
return system_metrics