elasticapm/metrics/sets/cpu_linux.py (196 lines of code) (raw):

# BSD 3-Clause License # # Copyright (c) 2019, Elasticsearch BV # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import logging import os import re import resource import threading from elasticapm.metrics.base_metrics import MetricSet SYS_STATS = "/proc/stat" MEM_STATS = "/proc/meminfo" PROC_STATS = "/proc/self/stat" CGROUP1_MEMORY_LIMIT = "memory.limit_in_bytes" CGROUP1_MEMORY_USAGE = "memory.usage_in_bytes" CGROUP1_MEMORY_STAT = "memory.stat" CGROUP2_MEMORY_LIMIT = "memory.max" CGROUP2_MEMORY_USAGE = "memory.current" CGROUP2_MEMORY_STAT = "memory.stat" UNLIMITED = 0x7FFFFFFFFFFFF000 PROC_SELF_CGROUP = "/proc/self/cgroup" PROC_SELF_MOUNTINFO = "/proc/self/mountinfo" SYS_FS_CGROUP = "/sys/fs/cgroup" CPU_FIELDS = ("user", "nice", "system", "idle", "iowait", "irq", "softirq", "steal", "guest", "guest_nice") MEM_FIELDS = ("MemTotal", "MemAvailable", "MemFree", "Buffers", "Cached") whitespace_re = re.compile(r"\s+") MEMORY_CGROUP = re.compile(r"^\d+:memory:.*") CGROUP_V1_MOUNT_POINT = re.compile(r"^\d+? \d+? .+? .+? (.*?) .*cgroup.*memory.*") CGROUP_V2_MOUNT_POINT = re.compile(r"^\d+? \d+? .+? .+? (.*?) .*cgroup2.*cgroup.*") if not os.path.exists(SYS_STATS): raise ImportError("This metric set is only available on Linux") logger = logging.getLogger("elasticapm.metrics.cpu_linux") class CGroupFiles(object): def __init__(self, limit, usage, stat) -> None: self.limit = limit if os.access(limit, os.R_OK) else None self.usage = usage if os.access(usage, os.R_OK) else None self.stat = stat if os.access(stat, os.R_OK) else None class CPUMetricSet(MetricSet): def __init__( self, registry, sys_stats_file=SYS_STATS, process_stats_file=PROC_STATS, memory_stats_file=MEM_STATS, proc_self_cgroup=PROC_SELF_CGROUP, mount_info=PROC_SELF_MOUNTINFO, ) -> None: self.page_size = resource.getpagesize() self.previous = {} self._read_data_lock = threading.Lock() self.sys_stats_file = sys_stats_file self.process_stats_file = process_stats_file self.memory_stats_file = memory_stats_file self._sys_clock_ticks = os.sysconf("SC_CLK_TCK") with self._read_data_lock: try: self.cgroup_files = self.get_cgroup_file_paths(proc_self_cgroup, mount_info) except Exception: logger.debug("Reading/Parsing of cgroup memory files failed, skipping cgroup metrics", exc_info=True) self.previous.update(self.read_process_stats()) self.previous.update(self.read_system_stats()) super(CPUMetricSet, self).__init__(registry) def get_cgroup_file_paths(self, proc_self_cgroup, mount_info): """ Try and find the paths for CGROUP memory limit files, first trying to find the root path in /proc/self/mountinfo, then falling back to the default location /sys/fs/cgroup :param proc_self_cgroup: path to "self" cgroup file, usually /proc/self/cgroup :param mount_info: path to "mountinfo" file, usually proc/self/mountinfo :return: a 3-tuple of memory info files, or None """ line_cgroup = None try: with open(proc_self_cgroup, "r") as proc_self_cgroup_file: for line in proc_self_cgroup_file: if line_cgroup is None and line.startswith("0:"): line_cgroup = line if MEMORY_CGROUP.match(line): line_cgroup = line break except IOError: logger.debug("Cannot read %s, skipping cgroup metrics", proc_self_cgroup, exc_info=True) return if line_cgroup is None: return try: with open(mount_info, "r") as mount_info_file: for line in mount_info_file: # cgroup v2 matcher = CGROUP_V2_MOUNT_POINT.match(line) if matcher is not None: files = self._get_cgroup_v2_file_paths(line_cgroup, matcher.group(1)) if files: return files # cgroup v1 matcher = CGROUP_V1_MOUNT_POINT.match(line) if matcher is not None: files = self._get_cgroup_v1_file_paths(matcher.group(1)) if files: return files except IOError: logger.debug("Cannot read %s, skipping cgroup metrics", mount_info, exc_info=True) return # discovery of cgroup path failed, try with default path files = self._get_cgroup_v2_file_paths(line_cgroup, SYS_FS_CGROUP) if files: return files files = self._get_cgroup_v1_file_paths(os.path.join(SYS_FS_CGROUP, "memory")) if files: return files logger.debug("Location of cgroup files failed, skipping cgroup metrics") def _get_cgroup_v2_file_paths(self, line_cgroup, mount_discovered): line_split = line_cgroup.strip().split(":") slice_path = line_split[-1][1:] try: with open(os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_LIMIT), "r") as memfile: line_mem = memfile.readline().strip() if line_mem != "max": return CGroupFiles( os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_LIMIT), os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_USAGE), os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_STAT), ) except IOError: pass def _get_cgroup_v1_file_paths(self, mount_discovered): try: with open(os.path.join(mount_discovered, CGROUP1_MEMORY_LIMIT), "r") as memfile: mem_max = int(memfile.readline().strip()) if mem_max < UNLIMITED: return CGroupFiles( os.path.join(mount_discovered, CGROUP1_MEMORY_LIMIT), os.path.join(mount_discovered, CGROUP1_MEMORY_USAGE), os.path.join(mount_discovered, CGROUP1_MEMORY_STAT), ) except IOError: pass def before_collect(self) -> None: new = self.read_process_stats() new.update(self.read_system_stats()) with self._read_data_lock: prev = self.previous delta = {k: new[k] - prev[k] for k in new.keys()} try: cpu_usage_ratio = delta["cpu_usage"] / delta["cpu_total"] except ZeroDivisionError: cpu_usage_ratio = 0 self.gauge("system.cpu.total.norm.pct").val = cpu_usage_ratio # MemAvailable not present in linux before kernel 3.14 # fallback to MemFree + Buffers + Cache if not present - see #500 if "MemAvailable" in new: mem_free = new["MemAvailable"] else: mem_free = sum(new.get(mem_field, 0) for mem_field in ("MemFree", "Buffers", "Cached")) self.gauge("system.memory.actual.free").val = mem_free self.gauge("system.memory.total").val = new["MemTotal"] if "cgroup_mem_total" in new: self.gauge("system.process.cgroup.memory.mem.limit.bytes").val = new["cgroup_mem_total"] if "cgroup_mem_used" in new: self.gauge("system.process.cgroup.memory.mem.usage.bytes").val = new["cgroup_mem_used"] try: cpu_process_percent = delta["proc_total_time"] / delta["cpu_total"] except ZeroDivisionError: cpu_process_percent = 0 self.gauge("system.process.cpu.total.norm.pct").val = cpu_process_percent self.gauge("system.process.memory.size").val = new["vsize"] self.gauge("system.process.memory.rss.bytes").val = new["rss"] * self.page_size self.previous = new def read_system_stats(self): stats = {} with open(self.sys_stats_file, "r") as pidfile: for line in pidfile: if line.startswith("cpu "): fields = whitespace_re.split(line)[1:-1] num_fields = len(fields) # Not all fields are available on all platforms (e.g. RHEL 6 does not provide steal, guest, and # guest_nice. If a field is missing, we default to 0 f = {field: int(fields[i]) if i < num_fields else 0 for i, field in enumerate(CPU_FIELDS)} stats["cpu_total"] = float( f["user"] + f["nice"] + f["system"] + f["idle"] + f["iowait"] + f["irq"] + f["softirq"] + f["steal"] ) stats["cpu_usage"] = stats["cpu_total"] - (f["idle"] + f["iowait"]) break if self.cgroup_files: if self.cgroup_files.limit: with open(self.cgroup_files.limit, "r") as memfile: stats["cgroup_mem_total"] = int(memfile.readline()) if self.cgroup_files.usage: with open(self.cgroup_files.usage, "r") as memfile: usage = int(memfile.readline()) stats["cgroup_mem_used"] = usage with open(self.memory_stats_file, "r") as memfile: for line in memfile: metric_name = line.split(":")[0] if metric_name in MEM_FIELDS: value_in_bytes = int(whitespace_re.split(line)[1]) * 1024 stats[metric_name] = value_in_bytes return stats def read_process_stats(self): stats = {} with open(self.process_stats_file, "r") as pidfile: data = pidfile.readline().split(" ") stats["utime"] = int(data[13]) stats["stime"] = int(data[14]) stats["proc_total_time"] = stats["utime"] + stats["stime"] stats["vsize"] = int(data[22]) stats["rss"] = int(data[23]) return stats