in smdebug/profiler/analysis/utils/pandas_data_analysis.py [0:0]
def get_device_usage_stats(self, device=None, utilization_ranges=None):
"""
Find the usage spread based on utilization ranges. If ranges are not provided,
>90, 10-90, <10 are considered
:param device: List of Resource.cpu, Resource.gpu. Type: Resource
:param utilization_ranges: list of tuples
"""
if (device is not None) and (not isinstance(device, (list, Resource))):
get_logger().info(f"{device} should be of type list or Resource")
return pd.DataFrame()
if device is None:
resources = [Resource.CPU.value, Resource.GPU.value]
else:
if isinstance(device, Resource):
device = [device]
resources = [x.value for x in device]
if utilization_ranges is None:
utilization_ranges = [(90, 100), (10, 90), (0, 10)]
if not isinstance(utilization_ranges, list):
get_logger().info(
f"{utilization_ranges} should be a list of tuples containing the ranges"
)
return pd.DataFrame()
if len(utilization_ranges) == 0:
get_logger().info(f"{utilization_ranges} cannot be empty")
return pd.DataFrame()
if any(len(utilization_range) != 2 for utilization_range in utilization_ranges):
get_logger().info(
f"Each interval in {utilization_ranges} must have a start and end value"
)
return pd.DataFrame()
def helper(x, util_ranges):
for start, end in util_ranges:
if start <= float(x) <= end:
return (start, end)
return ()
self.sys_metrics_df["ranges"] = self.sys_metrics_df.apply(
lambda x: helper(x["value"], utilization_ranges), axis=1
)
device_sys_df = self.sys_metrics_df[self.sys_metrics_df["ranges"] != ()]
if device_sys_df.empty:
return device_sys_df
usage_stats = device_sys_df[
device_sys_df["type"].str.contains("|".join(resources)).any(level=0)
]
df_grouped = (
usage_stats.groupby(["type", "nodeID", "ranges"])["ranges"].describe().reset_index()
)
df_grouped = df_grouped.drop(["unique", "top", "freq"], axis="columns")
df_grouped = (
df_grouped.set_index(["type", "nodeID"]).pivot(columns="ranges")["count"].reset_index()
)
df_grouped = df_grouped.fillna(0)
return df_grouped