Providers/Modules/Plugins/VMInsights/plugin/VMInsightsDataCollector.rb (331 lines of code) (raw):

# frozen_string_literal: true require 'scanf' module VMInsights require_relative 'VMInsightsIDataCollector.rb' class DataCollector < IDataCollector def initialize(log, root_directory_name="/") @log = log @root = root_directory_name @baseline_exception = RuntimeError.new "baseline has not been called" @cpu_count = nil @saved_net_data = nil @saved_disk_data = DiskInventory.new(@log, @root) end def baseline @baseline_exception = nil @cpu_count, is_64_bit = get_cpu_info_baseline DataWithWrappingCounter.set_32_bit(! is_64_bit) @saved_net_data = get_net_data @saved_disk_data.baseline t, i = get_cpu_idle { :total_time => t, :idle => i } end def start_sample end def end_sample end def get_available_memory_kb available = nil total = nil File.open(File.join(@root, "proc", "meminfo"), "rb") { |f| begin line = f.gets next if line.nil? line.scanf("%s%d%s") { |label, value, uom| if (label == "MemTotal:" && value >= 0 && uom == "kB") total = value elsif (label == "MemAvailable:" && value >= 0 && uom == "kB") available = value end } end until f.eof? } raise IDataCollector::Unavailable, "Available memory not found" if available.nil? raise IDataCollector::Unavailable, "Total memory not found" if total.nil? return available, total end # returns: cummulative total time, cummulative idle time # /proc/stat contains system statistics since last restart # first line contains aggregate across all CPUs # format: # # cpu user nice system idle iowait irq softirq steal guest guest_nice # eg: cpu 2904083 315778 1613190 140077550 216726 0 88355 0 0 0 # https://www.kernel.org/doc/html/latest/filesystems/proc.html#miscellaneous-kernel-statistics-in-proc-stat # Above values are stored as u64 counters measuring in nanoseconds # For 256 cpu's this means rollover occurs in approx 833 days (worst case) # https://elixir.bootlin.com/linux/latest/source/fs/proc/stat.c#L111 # https://elixir.bootlin.com/linux/v4.18/source/fs/proc/stat.c#L120 def get_cpu_idle total_time = nil idle = nil File.open(File.join(@root, "proc", "stat"), "rb") { |f| line = f.gets raise Unavailable, "/proc/stat empty" if line.nil? time_entries = line.split(" ") # cpu user nice system idle - remaining entries depend on kernel version raise Unavailable, "/proc/stat: first entry not cpu" if time_entries[0] != "cpu" raise Unavailable, "/proc/stat insufficient entries" if time_entries.length < 5 time_entries = time_entries.slice(1, time_entries.length) # skip the first entry in row: "cpu" # last six entries are kernel version dependent so pad with 6 0 values time_entries.push("0", "0", "0", "0", "0", "0") idle = time_entries[3].to_i + time_entries[4].to_i total_time = time_entries.map(&:to_i).sum } return total_time, idle end # returns: # number of CPUs available for scheduling tasks # raises: # Unavailable if not available def get_number_of_cpus raise @baseline_exception if @baseline_exception raise @cpu_count if @cpu_count.kind_of? StandardError @cpu_count end # return: # An array of objects with methods: # mount_point # size_in_bytes # free_space_in_bytes # device_name def get_filesystems result = [] df = File.join(@root, "bin", "df") IO.popen([df, "--block-size=1", "-T"], { :in => :close, :err => File::NULL }) { |io| while (line = io.gets) a = line.split(" ") if (a[1] =~ /^(ext[234]|xfs)$/) begin result << Fs.new(a[0], a[6], a[2], a[4]) if a.size == 7 rescue ArgumentError => ex # malformed input @log.debug() { "#{__method__}: #{ex}: '#{line}'" } end end end } result rescue => ex raise IDataCollector::Unavailable.new ex.message end # returns: # An array of objects with methods: # device # bytes_received since last call or baseline # bytes_sent since last call or baseline # Note: Only devices that are "UP" or had activity are included def get_net_stats raise @baseline_exception if @baseline_exception result = [] new_data = get_net_data new_data.each_pair { |key, new_value| previous_value = @saved_net_data[key] if previous_value.nil? result << new_value if new_value.up else diff = new_value - previous_value result << diff if (new_value.up || diff.active?) end } @saved_net_data = new_data result end def get_disk_stats(dev) raise @baseline_exception if @baseline_exception @saved_disk_data.get_disk_stats(dev) end private class DataWithWrappingCounter @@counter_modulus = 0 # default to cause exception def self.set_32_bit(is32bit) @@counter_modulus = (2 ** (is32bit ? 32 : 64)) end protected def sub_with_wrap(a, b) (@@counter_modulus + a - b) % @@counter_modulus end end class DiskInventory def initialize(log, root) @log = log @root = root @sector_sizes = Hash.new { |h, k| h[k] = get_sector_size(k) } @saved_disk_data = { } end def baseline @sector_sizes.clear() @sector_sizes.merge!(get_sector_sizes) @saved_disk_data = { } @sector_sizes.each_pair { |d, s| begin @saved_disk_data[d] = get_disk_data(d, s) rescue IDataCollector::Unavailable => ex # NOP end } end def get_disk_stats(dev) current = get_disk_data dev, @sector_sizes[dev] raise IDataCollector::Unavailable, "no data for #{dev}" if current.nil? previous = @saved_disk_data[dev] @saved_disk_data[dev] = current raise IDataCollector::Unavailable, "no previous data for #{dev}" if previous.nil? current - previous end private def get_sector_size(dev) raise ArgumentError, "dev is nil" if dev.nil? data = get_sector_sizes data[dev] end def get_sector_sizes() cmd = [ File.join(@root, "bin", "lsblk"), "-sd", "-oNAME,LOG-SEC" ] result = { } begin IO.popen(cmd, { :in => :close }) { |io| io.gets # skips the header while (line = io.gets) s = line.split(" ") next if s.length < 2 result[s[0]] = s[1].to_i end } rescue => ex @log.debug() { "#{__method__}: #{ex}" } end result end def get_disk_data(dev, sector_size) path = File.join(@root, "sys", "class", "block", dev, "stat") begin File.open(path, "rb") { |f| line = f.gets raise Unavailable, "#{path}: is empty" if line.nil? data = line.split(" ") RawDiskData.new( dev, Time.now, data[0].to_i, data[2].to_i, data[4].to_i, data[6].to_i, sector_size ) } rescue Errno::ENOENT => ex raise IDataCollector::Unavailable, "#{path}: #{ex}" end end class DiskData def initialize(d, t, r, rb, w, wb) @device = -d @delta_time = t @reads = r @bytes_read = rb @writes = w @bytes_written = wb end attr_reader :device, :reads, :bytes_read, :writes, :bytes_written, :delta_time end class RawDiskData < DataWithWrappingCounter def initialize(d, t, r, rs, w, ws, ss) @device = -d @time = t @reads = r @read_sectors = rs @writes = w @write_sectors = ws @sector_size = ss end attr_reader :device, :time, :reads, :read_sectors, :writes, :write_sectors def -(other) raise ArgumentError, "#{device} != #{other.device}" unless device == other.device delta_t = (time - other.time) DiskData.new( device, delta_t, sub_with_wrap(reads, other.reads), @sector_size.nil? ? nil : (sub_with_wrap(read_sectors, other.read_sectors) * @sector_size), sub_with_wrap(writes, other.writes), @sector_size.nil? ? nil : (sub_with_wrap(write_sectors, other.write_sectors) * @sector_size) ) end end end class NetData def initialize(d, t, r, s) @device = -d @delta_time = t @bytes_received = r @bytes_sent = s end def active? (@bytes_received > 0) || (@bytes_sent > 0) end attr_reader :device, :delta_time, :bytes_received, :bytes_sent end class RawNetData < DataWithWrappingCounter def initialize(d, t, u, r, s) @time = t @device = -d @bytes_received = r @bytes_sent = s @up = u end attr_reader :up def -(other) NetData.new @device, @time - other.time, sub_with_wrap(@bytes_received, other.bytes_received), sub_with_wrap(@bytes_sent, other.bytes_sent) end attr_reader :device, :time, :bytes_received, :bytes_sent end def get_net_data sys_devices_virtual_net = File.join(@root, "sys", "devices", "virtual", "net") devices_up = get_up_net_devices result = { } File.open(File.join(@root, "proc", "net", "dev"), "rb") { |f| now = Time.now while (line = f.gets) line = line.split(" ") next if line.empty? dev = line[0] next unless ((0...10).include? dev.length) && (dev.end_with? ":") dev.chop! next if Dir.exist? File.join(sys_devices_virtual_net, dev) result[dev] = RawNetData.new(dev, now, devices_up[dev], line[1].to_i, line[9].to_i) end } result end def get_up_net_devices result = Hash.new(false) begin File.open(File.join(@root, "proc", "net", "route")) { |f| f.gets # skip the header while (line = f.gets) dev = line.partition(/\t+/)[0] result[dev] = true unless dev.empty? end } rescue => ex @log.debug() { "#{__method__}: #{ex}" } end result end class Fs def initialize(device_name, mount_point, size_in_bytes, free_space_in_bytes) raise ArgumentError, mount_point unless mount_point.start_with? "/" raise ArgumentError, device_name unless device_name.start_with?("/dev/") device_name = device_name.sub(/^\/dev\//, '') @device_name = device_name @mount_point = mount_point @size_in_bytes = Integer(size_in_bytes, 10) raise ArgumentError, size_in_bytes if (@size_in_bytes == 0) @free_space_in_bytes = Integer(free_space_in_bytes, 10) end def <=>(o) r = device_name <=> o.device_name return r unless r.zero? r = mount_point <=> o.mount_point return r unless r.zero? r = size_in_bytes <=> o.size_in_bytes return r unless r.zero? free_space_in_bytes <=> o.free_space_in_bytes end attr_reader :device_name, :mount_point, :size_in_bytes, :free_space_in_bytes alias_method :to_s, :inspect end def get_cpu_info_baseline lscpu = File.join(@root, "usr", "bin", "lscpu") count = 0 IO.popen([lscpu, "-p" ], { :in => :close, :err => File::NULL }) { |io| while (line = io.gets) count += 1 if ('0' .. '9').member?(line[0]) end } count = Unavailable.new "No CPUs found" if count.zero? is_64_bit = false IO.popen({"LC_ALL" => "C"}, lscpu, { :in => :close, :err => File::NULL }) { |io| while (line = io.gets) if line.start_with? "CPU op-mode(s):" is_64_bit = (line.include? "64-bit") break end end } return count, is_64_bit rescue => ex return (Unavailable.new ex.message), true end end # DataCollector end #module