azurelinuxagent/ga/cpucontroller.py (148 lines of code) (raw):

# Copyright 2018 Microsoft Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Requires Python 2.6+ and Openssl 1.0+ import errno import os import re from azurelinuxagent.common.exception import CGroupsException from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil from azurelinuxagent.common.utils import fileutil from azurelinuxagent.ga.cgroupcontroller import _CgroupController, MetricValue, MetricsCategory, MetricsCounter re_v1_user_system_times = re.compile(r'user (\d+)\nsystem (\d+)\n') re_v2_usage_time = re.compile(r'[\s\S]*usage_usec (\d+)[\s\S]*') class _CpuController(_CgroupController): def __init__(self, name, cgroup_path): super(_CpuController, self).__init__(name, cgroup_path) self._osutil = get_osutil() self._previous_cgroup_cpu = None self._previous_system_cpu = None self._current_cgroup_cpu = None self._current_system_cpu = None self._previous_throttled_time = None self._current_throttled_time = None def _get_cpu_stat_counter(self, counter_name): """ Gets the value for the provided counter in cpu.stat """ try: with open(os.path.join(self.path, 'cpu.stat')) as cpu_stat: # # Sample file v1: # # cat cpu.stat # nr_periods 51660 # nr_throttled 19461 # throttled_time 1529590856339 # # Sample file v2 # # cat cpu.stat # usage_usec 200161503 # user_usec 199388368 # system_usec 773134 # core_sched.force_idle_usec 0 # nr_periods 40059 # nr_throttled 40022 # throttled_usec 3565247992 # nr_bursts 0 # burst_usec 0 # for line in cpu_stat: match = re.match(r'{0}\s+(\d+)'.format(counter_name), line) if match is not None: return int(match.groups()[0]) raise Exception("Cannot find {0}".format(counter_name)) except (IOError, OSError) as e: if e.errno == errno.ENOENT: return 0 raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) except Exception as e: raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) def _cpu_usage_initialized(self): """ Returns True if cpu usage has been initialized, False otherwise. """ return self._current_cgroup_cpu is not None and self._current_system_cpu is not None def initialize_cpu_usage(self): """ Sets the initial values of CPU usage. This function must be invoked before calling get_cpu_usage(). """ raise NotImplementedError() def get_cpu_usage(self): """ Computes the CPU used by the cgroup since the last call to this function. The usage is measured as a percentage of utilization of 1 core in the system. For example, using 1 core all of the time on a 4-core system would be reported as 100%. NOTE: initialize_cpu_usage() must be invoked before calling get_cpu_usage() """ raise NotImplementedError() def get_cpu_throttled_time(self, read_previous_throttled_time=True): """ Computes the throttled time (in seconds) since the last call to this function. NOTE: initialize_cpu_usage() must be invoked before calling this function Compute only current throttled time if read_previous_throttled_time set to False """ raise NotImplementedError() def get_tracked_metrics(self): # Note: If the current cpu usage is less than the previous usage (metric is negative), then an empty array will # be returned and the agent won't track the metrics. tracked = [] cpu_usage = self.get_cpu_usage() if cpu_usage >= float(0): tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.PROCESSOR_PERCENT_TIME, self.name, cpu_usage)) throttled_time = self.get_cpu_throttled_time() if cpu_usage >= float(0) and throttled_time >= float(0): tracked.append(MetricValue(MetricsCategory.CPU_CATEGORY, MetricsCounter.THROTTLED_TIME, self.name, throttled_time)) return tracked def get_unit_properties(self): return ["CPUAccounting", "CPUQuotaPerSecUSec"] def get_controller_type(self): return "cpu" class CpuControllerV1(_CpuController): def initialize_cpu_usage(self): if self._cpu_usage_initialized(): raise CGroupsException("initialize_cpu_usage() should be invoked only once") self._current_cgroup_cpu = self._get_cpu_ticks(allow_no_such_file_or_directory_error=True) self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot() self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_time') def _get_cpu_ticks(self, allow_no_such_file_or_directory_error=False): """ Returns the number of USER_HZ of CPU time (user and system) consumed by this cgroup. If allow_no_such_file_or_directory_error is set to True and cpuacct.stat does not exist the function returns 0; this is useful when the function can be called before the cgroup has been created. """ try: cpuacct_stat = self._get_file_contents('cpuacct.stat') except Exception as e: if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 raise CGroupsException("Failed to read cpuacct.stat: {0}".format(ustr(e))) if not allow_no_such_file_or_directory_error: raise e cpuacct_stat = None cpu_ticks = 0 if cpuacct_stat is not None: # # Sample file: # # cat /sys/fs/cgroup/cpuacct/azure.slice/walinuxagent.service/cpuacct.stat # user 10190 # system 3160 # match = re_v1_user_system_times.match(cpuacct_stat) if not match: raise CGroupsException("The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpuacct.stat'), cpuacct_stat)) cpu_ticks = int(match.groups()[0]) + int(match.groups()[1]) return cpu_ticks def get_cpu_usage(self): if not self._cpu_usage_initialized(): raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()") self._previous_cgroup_cpu = self._current_cgroup_cpu self._previous_system_cpu = self._current_system_cpu self._current_cgroup_cpu = self._get_cpu_ticks() self._current_system_cpu = self._osutil.get_total_cpu_ticks_since_boot() cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu system_delta = max(1, self._current_system_cpu - self._previous_system_cpu) return round(100.0 * self._osutil.get_processor_cores() * float(cgroup_delta) / float(system_delta), 3) def get_cpu_throttled_time(self, read_previous_throttled_time=True): # Throttled time is reported in nanoseconds in v1 if not read_previous_throttled_time: return float(self._get_cpu_stat_counter(counter_name='throttled_time') / 1E9) if not self._cpu_usage_initialized(): raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_throttled_time()") self._previous_throttled_time = self._current_throttled_time self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_time') return round(float(self._current_throttled_time - self._previous_throttled_time) / 1E9, 3) class CpuControllerV2(_CpuController): @staticmethod def get_system_uptime(): """ Get the uptime of the system (including time spent in suspend) in seconds. /proc/uptime contains two numbers (values in seconds): the uptime of the system (including time spent in suspend) and the amount of time spent in the idle process: # cat /proc/uptime 365380.48 722644.81 :return: System uptime in seconds :rtype: float """ uptime_contents = fileutil.read_file('/proc/uptime').split() return float(uptime_contents[0]) def _get_system_usage(self): try: return self.get_system_uptime() except (OSError, IOError) as e: raise CGroupsException("Couldn't read /proc/uptime: {0}".format(ustr(e))) except Exception as e: raise CGroupsException("Couldn't parse /proc/uptime: {0}".format(ustr(e))) def initialize_cpu_usage(self): if self._cpu_usage_initialized(): raise CGroupsException("initialize_cpu_usage() should be invoked only once") self._current_cgroup_cpu = self._get_cpu_time(allow_no_such_file_or_directory_error=True) self._current_system_cpu = self._get_system_usage() self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_usec') def _get_cpu_time(self, allow_no_such_file_or_directory_error=False): """ Returns the CPU time (user and system) consumed by this cgroup in seconds. If allow_no_such_file_or_directory_error is set to True and cpu.stat does not exist the function returns 0; this is useful when the function can be called before the cgroup has been created. """ try: cpu_stat = self._get_file_contents('cpu.stat') except Exception as e: if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 raise CGroupsException("Failed to read cpu.stat: {0}".format(ustr(e))) if not allow_no_such_file_or_directory_error: raise e cpu_stat = None cpu_time = 0 if cpu_stat is not None: # # Sample file: # # cat /sys/fs/cgroup/azure.slice/azure-walinuxagent.slice/azure-walinuxagent-logcollector.slice/collect-logs.scope/cpu.stat # usage_usec 1990707 # user_usec 1939858 # system_usec 50848 # core_sched.force_idle_usec 0 # nr_periods 397 # nr_throttled 397 # throttled_usec 37994949 # nr_bursts 0 # burst_usec 0 # match = re_v2_usage_time.match(cpu_stat) if not match: raise CGroupsException("The contents of {0} are invalid: {1}".format(self._get_cgroup_file('cpu.stat'), cpu_stat)) cpu_time = int(match.groups()[0]) / 1E6 return cpu_time def get_cpu_usage(self): if not self._cpu_usage_initialized(): raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_usage()") self._previous_cgroup_cpu = self._current_cgroup_cpu self._previous_system_cpu = self._current_system_cpu self._current_cgroup_cpu = self._get_cpu_time() self._current_system_cpu = self._get_system_usage() cgroup_delta = self._current_cgroup_cpu - self._previous_cgroup_cpu system_delta = max(1.0, self._current_system_cpu - self._previous_system_cpu) return round(100.0 * float(cgroup_delta) / float(system_delta), 3) def get_cpu_throttled_time(self, read_previous_throttled_time=True): # Throttled time is reported in microseconds in v2 if not read_previous_throttled_time: return float(self._get_cpu_stat_counter(counter_name='throttled_usec') / 1E6) if not self._cpu_usage_initialized(): raise CGroupsException("initialize_cpu_usage() must be invoked before the first call to get_cpu_throttled_time()") self._previous_throttled_time = self._current_throttled_time self._current_throttled_time = self._get_cpu_stat_counter(counter_name='throttled_usec') return round(float(self._current_throttled_time - self._previous_throttled_time) / 1E6, 3)