def get_framework_metrics_by_timesteps()

in smdebug/profiler/analysis/utils/profiler_data_to_pandas.py [0:0]


    def get_framework_metrics_by_timesteps(self, timestep_list=[], selected_framework_metrics=[]):
        """
        Get framework metrics for a list of timeranges. This function is useful when we want to correlate framework metrics with system metrics. Framework metrics have a begin and end timestamp. System metrics have only a single timestamp.
        :param timestep_list: list of timestamps
        :param selected_framework_metrics: list of framework metrics which will be stored in the dataframe
        :return: Framework metrics DataFrame
        """
        # get min and max search range
        timestep_list = sorted(timestep_list)
        start_time_us = self.convert_datetime_to_timestamp(timestep_list[0])
        end_time_us = self.convert_datetime_to_timestamp(timestep_list[-1])

        # to avoid out of memory issues, we read data in chunks
        current_time_us = start_time_us
        if end_time_us - start_time_us > self.interval:
            current_time_us = start_time_us + self.interval
        else:
            current_time_us = end_time_us
        results = {}
        results_detailed = {}
        counter = 0

        while start_time_us < end_time_us:
            # get all framework metrics from last to current timestamp
            self.framework_metrics_reader.refresh_event_file_list()
            events = self.framework_metrics_reader.get_events(start_time_us, current_time_us)

            # iterate over system metrics timestamps and find overlap
            for index, timestamp in enumerate(timestep_list[counter:]):
                timestamp = self.convert_datetime_to_timestamp(timestamp)
                if timestamp >= current_time_us:
                    counter = index
                    break
                for event in events:
                    if len(selected_framework_metrics) > 0 and (
                        event.event_name not in selected_framework_metrics
                        and event.event_phase not in selected_framework_metrics
                    ):
                        continue
                    if event.start_time < timestamp and event.end_time > timestamp:
                        if event.event_phase not in results:
                            results[event.event_phase] = 0
                        results[event.event_phase] += event.end_time - event.start_time
                        if "Step" not in event.event_name:
                            if event.event_name not in results_detailed:
                                results_detailed[event.event_name] = 0
                            results_detailed[event.event_name] += event.end_time - event.start_time
            # read the next chunk of framework metrics
            start_time_us = current_time_us
            if current_time_us + self.interval < end_time_us:
                current_time_us = current_time_us + self.interval
            else:
                current_time_us = end_time_us

        framework_metrics = {}
        training_phase = {}

        for key in results:
            if "Step" in key:
                training_phase[key] = results[key]
            else:
                framework_metrics[key] = results[key]

        if len(framework_metrics.values()) > 0:
            max_value = float(max(list(framework_metrics.values())))
            for key in framework_metrics:
                framework_metrics[key] = framework_metrics[key] / max_value

        return framework_metrics, results_detailed, training_phase