runtime/runtime_metrics.go (157 lines of code) (raw):

// Copyright (c) 2023 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package zanzibar import ( "runtime" "sync" "time" "github.com/uber-go/tally" ) const _numGCThreshold = uint32(len(runtime.MemStats{}.PauseEnd)) // RuntimeMetricsOptions configuration. type RuntimeMetricsOptions struct { EnableCPUMetrics bool `json:"enableCPUMetrics"` EnableMemMetrics bool `json:"enableMemMetrics"` EnableGCMetrics bool `json:"enableGCMetrics"` CollectInterval time.Duration `json:"collectInterval"` } // RuntimeMetricsCollector interface. type RuntimeMetricsCollector interface { Start() Stop() IsRunning() bool } type runtimeMetrics struct { // maximum number of CPUs which are executing simultaneously goMaxProcs tally.Gauge // number of logical CPUs usable by the current process numCPUs tally.Gauge // number of goroutines that currently exist numGoRoutines tally.Gauge // bytes of allocated heap objects heapAlloc tally.Gauge // bytes in idle (unused) spans heapIdle tally.Gauge // bytes in in-use spans heapInuse tally.Gauge // bytes in stack spans stackInuse tally.Gauge // number of completed GC cycles numGC tally.Counter // GC pause time gcPauseMs tally.Timer gcPauseMsHist tally.Histogram } // runtimeCollector keeps the current state of runtime metrics type runtimeCollector struct { opts RuntimeMetricsOptions scope tally.Scope metrics runtimeMetrics runningMutex sync.RWMutex running bool // protected by runningMutex stop chan struct{} lastNumGC uint32 } // StartRuntimeMetricsCollector starts collecting runtime metrics periodically. // Recommended usage: // rm := StartRuntimeMetricsCollector(rootScope.Scope("runtime"), opts) // ... // rm.Stop() func StartRuntimeMetricsCollector( config RuntimeMetricsOptions, scope tally.Scope, ) RuntimeMetricsCollector { if !config.EnableCPUMetrics && !config.EnableMemMetrics && !config.EnableGCMetrics { return nil } rm := NewRuntimeMetricsCollector( config, scope.SubScope("runtime"), ) rm.Start() return rm } // NewRuntimeMetricsCollector creates a new runtime metrics collector. func NewRuntimeMetricsCollector( opts RuntimeMetricsOptions, scope tally.Scope, ) RuntimeMetricsCollector { var memstats runtime.MemStats runtime.ReadMemStats(&memstats) return &runtimeCollector{ opts: opts, scope: scope, metrics: runtimeMetrics{ // CPU goMaxProcs: scope.Gauge("gomaxprocs"), numCPUs: scope.Gauge("num-cpu"), numGoRoutines: scope.Gauge("num-goroutines"), // Memory heapAlloc: scope.Gauge("memory.heap"), heapIdle: scope.Gauge("memory.heapidle"), heapInuse: scope.Gauge("memory.heapinuse"), stackInuse: scope.Gauge("memory.stack"), // GC numGC: scope.Counter("memory.num-gc"), gcPauseMs: scope.Timer("memory.gc-pause-ms"), gcPauseMsHist: scope.Histogram("memory.gc-pause-ms-hist", tally.DefaultBuckets), }, running: false, stop: make(chan struct{}), lastNumGC: memstats.NumGC, } } // Start collecting runtime metrics periodically. func (r *runtimeCollector) Start() { r.runningMutex.RLock() if r.running { r.runningMutex.RUnlock() return } r.runningMutex.RUnlock() if r.opts.EnableCPUMetrics || r.opts.EnableMemMetrics || r.opts.EnableGCMetrics { go func() { ticker := time.NewTicker(r.opts.CollectInterval) for { select { case <-ticker.C: r.collect() case <-r.stop: ticker.Stop() return } } }() r.runningMutex.Lock() r.running = true r.runningMutex.Unlock() } } // Stop collecting runtime metrics. It cannot be restarted once stopped. func (r *runtimeCollector) Stop() { r.runningMutex.Lock() defer r.runningMutex.Unlock() close(r.stop) r.running = false } // IsRunning returns true if the runtime metrics collector was running; otherwise false. func (r *runtimeCollector) IsRunning() bool { r.runningMutex.RLock() defer r.runningMutex.RUnlock() return r.running } func (r *runtimeCollector) collect() { var memStats runtime.MemStats if r.opts.EnableMemMetrics || r.opts.EnableGCMetrics { runtime.ReadMemStats(&memStats) } if r.opts.EnableCPUMetrics { r.collectCPUMetrics() } if r.opts.EnableMemMetrics { r.collectMemMetrics(&memStats) } if r.opts.EnableGCMetrics { r.collectGCMetrics(&memStats) } } func (r *runtimeCollector) collectCPUMetrics() { r.metrics.goMaxProcs.Update(float64(runtime.GOMAXPROCS(0))) r.metrics.numCPUs.Update(float64(runtime.NumCPU())) r.metrics.numGoRoutines.Update(float64(runtime.NumGoroutine())) } func (r *runtimeCollector) collectMemMetrics(memStats *runtime.MemStats) { r.metrics.heapAlloc.Update(float64(memStats.HeapAlloc)) r.metrics.heapIdle.Update(float64(memStats.HeapIdle)) r.metrics.heapInuse.Update(float64(memStats.HeapInuse)) r.metrics.stackInuse.Update(float64(memStats.StackInuse)) } func (r *runtimeCollector) collectGCMetrics(memStats *runtime.MemStats) { num := memStats.NumGC lastNum := r.lastNumGC r.lastNumGC = num if delta := num - lastNum; delta > 0 { r.metrics.numGC.Inc(int64(delta)) if delta >= _numGCThreshold { /* coverage ignore next line */ lastNum = num - _numGCThreshold } for i := lastNum; i != num; i++ { pause := memStats.PauseNs[i%uint32(len(memStats.PauseNs))] r.metrics.gcPauseMs.Record(time.Duration(pause)) r.metrics.gcPauseMsHist.RecordDuration(time.Duration(pause)) } } }