internal/plugin/manager/pluginmetrics.go (57 lines of code) (raw):
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package manager
import (
"context"
"fmt"
"time"
"github.com/GoogleCloudPlatform/galog"
acpb "github.com/GoogleCloudPlatform/google-guest-agent/internal/acp/proto/google_guest_agent/acp"
"github.com/GoogleCloudPlatform/google-guest-agent/internal/ps"
tpb "google.golang.org/protobuf/types/known/timestamppb"
)
// Metric is a struct to store the plugin's current memory and CPU usage at a
// specific timestamp.
type Metric struct {
// timestamp is the time when the metric is recorded.
timestamp *tpb.Timestamp
// memoryUsage is the memory usage of the plugin at the timestamp.
memoryUsage int64
// cpuUsage is the CPU usage of the plugin at the timestamp.
cpuUsage float32
}
// PluginMetrics is a struct to monitor and store a plugin's metrics.
type PluginMetrics struct {
// plugin is the plugin to be monitored.
plugin *Plugin
// interval is the interval of getting the plugin's metrics.
interval time.Duration
}
// NewPluginMetrics creates a new PluginMetrics.
func NewPluginMetrics(plugin *Plugin, interval time.Duration) *PluginMetrics {
return &PluginMetrics{plugin: plugin, interval: interval}
}
// ID returns the ID of the plugin metric.
func (p *PluginMetrics) ID() string {
return fmt.Sprintf("%s-metrics", p.plugin.FullName())
}
// Interval returns the interval of the getting plugin metrics.
func (p *PluginMetrics) Interval() (time.Duration, bool) {
return p.interval, true
}
// ShouldEnable returns true if this job should be scheduled or not by the
// scheduler.
func (*PluginMetrics) ShouldEnable(ctx context.Context) bool {
return true
}
// Run gets and caches the plugin's metrics.
func (p *PluginMetrics) Run(ctx context.Context) (bool, error) {
currentState := p.plugin.State()
if currentState != acpb.CurrentPluginStates_DaemonPluginState_RUNNING {
// Skip metric collection if process is not in running state. Reading
// [/proc] for example on Linux would fail anyways.
return true, fmt.Errorf("plugin %q found in state %v, skipping metric collection", p.plugin.FullName(), currentState)
}
pid := p.plugin.pid()
if pid == 0 {
// Stop metric collection if pid found as 0. This can happen if the plugin
// is being stopped/removed and previous scheduled metric collection job
// happened to attempt metric collection at the same time.
return false, fmt.Errorf("plugin %q is being stopped/removed, got pid 0, skipping metric collection", p.plugin.FullName())
}
currentCPUUsage, err := ps.CPUUsage(ctx, pid)
if err != nil {
galog.Warnf("Failed to get CPU usage for plugin %s: %v", p.plugin.FullName(), err)
}
currentMemoryUsage, err := ps.Memory(pid)
if err != nil {
galog.Warnf("Failed to get memory usage for plugin %s: %v", p.plugin.FullName(), err)
}
p.plugin.RuntimeInfo.metricsMu.Lock()
defer p.plugin.RuntimeInfo.metricsMu.Unlock()
// Cache the metrics.
p.plugin.RuntimeInfo.metrics.Add(Metric{
timestamp: tpb.Now(),
memoryUsage: int64(currentMemoryUsage),
cpuUsage: float32(currentCPUUsage),
})
return true, nil
}