def get_device_usage_stats()

in smdebug/profiler/analysis/utils/pandas_data_analysis.py [0:0]


    def get_device_usage_stats(self, device=None, utilization_ranges=None):
        """
        Find the usage spread based on utilization ranges. If ranges are not provided,
        >90, 10-90, <10 are considered
        :param device: List of Resource.cpu, Resource.gpu. Type: Resource
        :param utilization_ranges: list of tuples
        """
        if (device is not None) and (not isinstance(device, (list, Resource))):
            get_logger().info(f"{device} should be of type list or Resource")
            return pd.DataFrame()

        if device is None:
            resources = [Resource.CPU.value, Resource.GPU.value]
        else:
            if isinstance(device, Resource):
                device = [device]
            resources = [x.value for x in device]

        if utilization_ranges is None:
            utilization_ranges = [(90, 100), (10, 90), (0, 10)]
        if not isinstance(utilization_ranges, list):
            get_logger().info(
                f"{utilization_ranges} should be a list of tuples containing the ranges"
            )
            return pd.DataFrame()
        if len(utilization_ranges) == 0:
            get_logger().info(f"{utilization_ranges} cannot be empty")
            return pd.DataFrame()
        if any(len(utilization_range) != 2 for utilization_range in utilization_ranges):
            get_logger().info(
                f"Each interval in {utilization_ranges} must have a start and end value"
            )
            return pd.DataFrame()

        def helper(x, util_ranges):
            for start, end in util_ranges:
                if start <= float(x) <= end:
                    return (start, end)
            return ()

        self.sys_metrics_df["ranges"] = self.sys_metrics_df.apply(
            lambda x: helper(x["value"], utilization_ranges), axis=1
        )
        device_sys_df = self.sys_metrics_df[self.sys_metrics_df["ranges"] != ()]

        if device_sys_df.empty:
            return device_sys_df

        usage_stats = device_sys_df[
            device_sys_df["type"].str.contains("|".join(resources)).any(level=0)
        ]

        df_grouped = (
            usage_stats.groupby(["type", "nodeID", "ranges"])["ranges"].describe().reset_index()
        )
        df_grouped = df_grouped.drop(["unique", "top", "freq"], axis="columns")
        df_grouped = (
            df_grouped.set_index(["type", "nodeID"]).pivot(columns="ranges")["count"].reset_index()
        )
        df_grouped = df_grouped.fillna(0)
        return df_grouped