metric/cpu/cpu.go (154 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package cpu import ( "errors" "fmt" "github.com/elastic/elastic-agent-libs/mapstr" "github.com/elastic/elastic-agent-libs/opt" "github.com/elastic/elastic-agent-system-metrics/metric" ) // CPU manages the CPU metrics from /proc/stat // If a given metric isn't available on a given platform, // The value will be null. All methods that use these fields // should assume that any value can be null. // The values are in "ticks", which translates to milliseconds of CPU time type CPU struct { User opt.Uint `struct:"user,omitempty"` Sys opt.Uint `struct:"system,omitempty"` Idle opt.Uint `struct:"idle,omitempty"` Nice opt.Uint `struct:"nice,omitempty"` // Linux, Darwin, BSD Irq opt.Uint `struct:"irq,omitempty"` // Linux and openbsd Wait opt.Uint `struct:"iowait,omitempty"` // Linux and AIX SoftIrq opt.Uint `struct:"softirq,omitempty"` // Linux only Stolen opt.Uint `struct:"steal,omitempty"` // Linux only } // MetricOpts defines the fields that are passed along to the formatted output type MetricOpts struct { Ticks bool Percentages bool NormalizedPercentages bool } // CPUInfo manages the CPU information from /proc/cpuinfo // If a given value isn't available on a given platformn // the value will be the type's zero-value type CPUInfo struct { ModelName string ModelNumber string Mhz float64 PhysicalID int CoreID int } // CPUMetrics carries global and per-core CPU metrics type CPUMetrics struct { totals CPU // list carries the same data, broken down by CPU list []CPU // CPUInfo carries some data from /proc/cpuinfo CPUInfo []CPUInfo } // Total returns the total CPU time in ticks as scraped by the API func (cpu CPU) Total() uint64 { // it's generally safe to blindly sum these up, // As we're just trying to get a total of all CPU time. return opt.SumOptUint(cpu.User, cpu.Nice, cpu.Sys, cpu.Idle, cpu.Wait, cpu.Irq, cpu.SoftIrq, cpu.Stolen) } type option struct { usePerformanceCounter bool } type OptionFunc func(*option) // Note: WithWindowsPerformanceCounter option is only effective for windows and is ineffective if used by other OS'. func WithWindowsPerformanceCounter() OptionFunc { return func(o *option) { o.usePerformanceCounter = true } } // Fetch collects a new sample of the CPU usage metrics. // This will overwrite the currently stored samples. func (m *Monitor) Fetch() (Metrics, error) { metric, err := Get(m) if err != nil { return Metrics{}, fmt.Errorf("error fetching CPU metrics: %w", err) } oldLastSample := m.lastSample m.lastSample = metric return Metrics{previousSample: oldLastSample.totals, currentSample: metric.totals, count: len(metric.list), isTotals: true}, nil } // FetchCores collects a new sample of CPU usage metrics per-core // This will overwrite the currently stored samples. func (m *Monitor) FetchCores() ([]Metrics, error) { metric, err := Get(m) if err != nil { return nil, fmt.Errorf("error fetching CPU metrics: %w", err) } coreMetrics := make([]Metrics, len(metric.list)) for i := 0; i < len(metric.list); i++ { lastMetric := CPU{} // Count of CPUs can change if len(m.lastSample.list) > i { lastMetric = m.lastSample.list[i] } coreMetrics[i] = Metrics{ currentSample: metric.list[i], previousSample: lastMetric, isTotals: false, } // Only add CPUInfo metric if it's available // Remove this if statement once CPUInfo is supported // by all systems if len(metric.CPUInfo) != 0 { coreMetrics[i].cpuInfo = metric.CPUInfo[i] } } m.lastSample = metric return coreMetrics, nil } // Metrics stores the current and the last sample collected by a Beat. type Metrics struct { previousSample CPU currentSample CPU count int cpuInfo CPUInfo isTotals bool } // Format returns the final MapStr data object for the metrics. func (metric Metrics) Format(opts MetricOpts) (mapstr.M, error) { timeDelta := metric.currentSample.Total() - metric.previousSample.Total() if timeDelta <= 0 { return nil, errors.New("previous sample is newer than current sample") } normCPU := metric.count if !metric.isTotals { normCPU = 1 } formattedMetrics := mapstr.M{} reportOptMetric := func(name string, current, previous opt.Uint, norm int) { if !current.IsZero() { formattedMetrics[name] = fillMetric(opts, current, previous, timeDelta, norm) } } if opts.Percentages { _, _ = formattedMetrics.Put("total.pct", createTotal(metric.previousSample, metric.currentSample, timeDelta, normCPU)) } if opts.NormalizedPercentages { _, _ = formattedMetrics.Put("total.norm.pct", createTotal(metric.previousSample, metric.currentSample, timeDelta, 1)) } // /proc/stat metrics reportOptMetric("user", metric.currentSample.User, metric.previousSample.User, normCPU) reportOptMetric("system", metric.currentSample.Sys, metric.previousSample.Sys, normCPU) reportOptMetric("idle", metric.currentSample.Idle, metric.previousSample.Idle, normCPU) reportOptMetric("nice", metric.currentSample.Nice, metric.previousSample.Nice, normCPU) reportOptMetric("irq", metric.currentSample.Irq, metric.previousSample.Irq, normCPU) reportOptMetric("iowait", metric.currentSample.Wait, metric.previousSample.Wait, normCPU) reportOptMetric("softirq", metric.currentSample.SoftIrq, metric.previousSample.SoftIrq, normCPU) reportOptMetric("steal", metric.currentSample.Stolen, metric.previousSample.Stolen, normCPU) // Only add CPU info metrics if we're returning information by core // (isTotals is false) if !metric.isTotals { // Some platforms do not report those metrics, so metric.cpuInfo // is empty, if that happens we do not add the empty metrics to the // final event. if metric.cpuInfo != (CPUInfo{}) { // /proc/cpuinfo metrics formattedMetrics["model_number"] = metric.cpuInfo.ModelNumber formattedMetrics["model_name"] = metric.cpuInfo.ModelName formattedMetrics["mhz"] = metric.cpuInfo.Mhz formattedMetrics["core_id"] = metric.cpuInfo.CoreID formattedMetrics["physical_id"] = metric.cpuInfo.PhysicalID } } return formattedMetrics, nil } func createTotal(prev, cur CPU, timeDelta uint64, numCPU int) float64 { idleTime := cpuMetricTimeDelta(prev.Idle, cur.Idle, timeDelta, numCPU) // Subtract wait time from total // Wait time is not counted from the total as per #7627. if !cur.Wait.IsZero() { idleTime = idleTime + cpuMetricTimeDelta(prev.Wait, cur.Wait, timeDelta, numCPU) } return metric.Round(float64(numCPU) - idleTime) } func fillMetric(opts MetricOpts, cur, prev opt.Uint, timeDelta uint64, numCPU int) mapstr.M { event := mapstr.M{} if opts.Ticks { _, _ = event.Put("ticks", cur.ValueOr(0)) } if opts.Percentages { _, _ = event.Put("pct", cpuMetricTimeDelta(prev, cur, timeDelta, numCPU)) } if opts.NormalizedPercentages { _, _ = event.Put("norm.pct", cpuMetricTimeDelta(prev, cur, timeDelta, 1)) } return event } // CPUCount returns the count of CPUs. When available, use this instead of runtime.NumCPU() func (metric *Metrics) CPUCount() int { return metric.count } // cpuMetricTimeDelta is a helper used by fillTicks to calculate the delta between two CPU tick values func cpuMetricTimeDelta(prev, current opt.Uint, timeDelta uint64, numCPU int) float64 { cpuDelta := int64(current.ValueOr(0) - prev.ValueOr(0)) pct := float64(cpuDelta) / float64(timeDelta) return metric.Round(pct * float64(numCPU)) }