azurelinuxagent/ga/cgroupcontroller.py (98 lines of code) (raw):
# Copyright 2018 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+
import errno
import os
from datetime import timedelta
from azurelinuxagent.common import logger, conf
from azurelinuxagent.common.exception import CGroupsException
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.utils import fileutil
_REPORT_EVERY_HOUR = timedelta(hours=1)
_DEFAULT_REPORT_PERIOD = timedelta(seconds=conf.get_cgroup_check_period())
AGENT_NAME_TELEMETRY = "walinuxagent.service" # Name used for telemetry; it needs to be consistent even if the name of the service changes
AGENT_LOG_COLLECTOR = "azure-walinuxagent-logcollector"
class CounterNotFound(Exception):
pass
class MetricValue(object):
"""
Class for defining all the required metric fields to send telemetry.
"""
def __init__(self, category, counter, instance, value, report_period=_DEFAULT_REPORT_PERIOD):
self._category = category
self._counter = counter
self._instance = instance
self._value = value
self._report_period = report_period
@property
def category(self):
return self._category
@property
def counter(self):
return self._counter
@property
def instance(self):
return self._instance
@property
def value(self):
return self._value
@property
def report_period(self):
return self._report_period
class MetricsCategory(object):
MEMORY_CATEGORY = "Memory"
CPU_CATEGORY = "CPU"
class MetricsCounter(object):
PROCESSOR_PERCENT_TIME = "% Processor Time"
THROTTLED_TIME = "Throttled Time (s)"
TOTAL_MEM_USAGE = "Total Memory Usage (B)"
ANON_MEM_USAGE = "Anon Memory Usage (B)"
CACHE_MEM_USAGE = "Cache Memory Usage (B)"
MAX_MEM_USAGE = "Max Memory Usage (B)"
SWAP_MEM_USAGE = "Swap Memory Usage (B)"
MEM_THROTTLED = "Total Memory Throttled Events"
AVAILABLE_MEM = "Available Memory (MB)"
USED_MEM = "Used Memory (MB)"
class _CgroupController(object):
def __init__(self, name, cgroup_path):
"""
Initialize _data collection for the controller
:param: name: Name of the CGroup
:param: cgroup_path: Path of the controller
:return:
"""
self.name = name
self.path = cgroup_path
def __str__(self):
return "{0} [{1}]".format(self.name, self.path)
def _get_cgroup_file(self, file_name):
return os.path.join(self.path, file_name)
def _get_file_contents(self, file_name):
"""
Retrieve the contents of file.
:param str file_name: Name of file within that metric controller
:return: Entire contents of the file
:rtype: str
"""
parameter_file = self._get_cgroup_file(file_name)
return fileutil.read_file(parameter_file)
def _get_parameters(self, parameter_name, first_line_only=False):
"""
Retrieve the values of a parameter from a controller.
Returns a list of values in the file.
:param first_line_only: return only the first line.
:param str parameter_name: Name of file within that metric controller
:return: The first line of the file, without line terminator
:rtype: [str]
"""
result = []
try:
values = self._get_file_contents(parameter_name).splitlines()
result = values[0] if first_line_only else values
except IndexError:
parameter_filename = self._get_cgroup_file(parameter_name)
logger.error("File {0} is empty but should not be".format(parameter_filename))
raise CGroupsException("File {0} is empty but should not be".format(parameter_filename))
except Exception as e:
if isinstance(e, (IOError, OSError)) and e.errno == errno.ENOENT: # pylint: disable=E1101
raise e
parameter_filename = self._get_cgroup_file(parameter_name)
raise CGroupsException("Exception while attempting to read {0}".format(parameter_filename), e)
return result
def is_active(self):
"""
Returns True if any processes belong to the cgroup. In v1, cgroup.procs returns a list of the thread group IDs
belong to the cgroup. In v2, cgroup.procs returns a list of the process IDs belonging to the cgroup.
"""
try:
procs = self._get_parameters("cgroup.procs")
if procs:
return len(procs) != 0
except (IOError, OSError) as e:
if e.errno == errno.ENOENT:
# only suppressing file not found exceptions.
pass
else:
logger.periodic_warn(logger.EVERY_HALF_HOUR,
'Could not get list of procs from "cgroup.procs" file in the cgroup: {0}.'
' Internal error: {1}'.format(self.path, ustr(e)))
except CGroupsException as e:
logger.periodic_warn(logger.EVERY_HALF_HOUR,
'Could not get list of procs from "cgroup.procs" file in the cgroup: {0}.'
' Internal error: {1}'.format(self.path, ustr(e)))
return False
def get_tracked_metrics(self):
"""
Retrieves the current value of the metrics tracked for this controller/cgroup and returns them as an array.
"""
raise NotImplementedError()
def get_unit_properties(self):
"""
Returns a list of the unit properties to collect for the controller.
"""
raise NotImplementedError()
def get_controller_type(self):
"""
Returns the type of the controller. Example: CPU, Memory, etc.
"""
raise NotImplementedError()