utils/prometheusmetrics/prometheusmetrics.go (218 lines of code) (raw):

// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"). You may // not use this file except in compliance with the License. A copy of the // License is located at // // http://aws.amazon.com/apache2.0/ // // or in the "license" file accompanying this file. This file is distributed // on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either // express or implied. See the License for the specific language governing // permissions and limitations under the License. package prometheusmetrics import ( "net/http" "strconv" "sync" "time" "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger" "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/retry" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) var log = logger.Get() var ( IpamdErr = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_ipamd_error_count", Help: "The number of errors encountered in ipamd", }, []string{"fn"}, ) IpamdActionsInprogress = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "awscni_ipamd_action_inprogress", Help: "The number of ipamd actions in progress", }, []string{"fn"}, ) EnisMax = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "awscni_eni_max", Help: "The maximum number of ENIs that can be attached to the instance, accounting for unmanaged ENIs", }, ) IpMax = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "awscni_ip_max", Help: "The maximum number of IP addresses that can be allocated to the instance", }, ) ReconcileCnt = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_reconcile_count", Help: "The number of times ipamd reconciles on ENIs and IP/Prefix addresses", }, []string{"fn"}, ) AddIPCnt = prometheus.NewCounter( prometheus.CounterOpts{ Name: "awscni_add_ip_req_count", Help: "The number of add IP address requests", }, ) DelIPCnt = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_del_ip_req_count", Help: "The number of delete IP address requests", }, []string{"reason"}, ) PodENIErr = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_pod_eni_error_count", Help: "The number of errors encountered for pod ENIs", }, []string{"fn"}, ) AwsAPILatency = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "awscni_aws_api_latency_ms", Help: "AWS API call latency in ms", }, []string{"api", "error", "status"}, ) AwsAPIErr = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_aws_api_error_count", Help: "The number of times AWS API returns an error", }, []string{"api", "error"}, ) AwsUtilsErr = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_aws_utils_error_count", Help: "The number of errors not handled in awsutils library", }, []string{"fn", "error"}, ) Ec2ApiReq = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_ec2api_req_count", Help: "The number of requests made to EC2 APIs by CNI", }, []string{"fn"}, ) Ec2ApiErr = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "awscni_ec2api_error_count", Help: "The number of failed EC2 APIs requests", }, []string{"fn"}, ) Enis = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "awscni_eni_allocated", Help: "The number of ENIs allocated", }, ) TotalIPs = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "awscni_total_ip_addresses", Help: "The total number of IP addresses", }, ) AssignedIPs = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "awscni_assigned_ip_addresses", Help: "The number of IP addresses assigned to pods", }, ) ForceRemovedENIs = prometheus.NewCounter( prometheus.CounterOpts{ Name: "awscni_force_removed_enis", Help: "The number of ENIs force removed while they had assigned pods", }, ) ForceRemovedIPs = prometheus.NewCounter( prometheus.CounterOpts{ Name: "awscni_force_removed_ips", Help: "The number of IPs force removed while they had assigned pods", }, ) TotalPrefixes = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "awscni_total_ipv4_prefixes", Help: "The total number of IPv4 prefixes", }, ) IpsPerCidr = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "awscni_assigned_ip_per_cidr", Help: "The total number of IP addresses assigned per cidr", }, []string{"cidr"}, ) NoAvailableIPAddrs = prometheus.NewCounter( prometheus.CounterOpts{ Name: "awscni_no_available_ip_addresses", Help: "The number of pod IP assignments that fail due to no available IP addresses", }, ) EniIPsInUse = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "awscni_assigned_ip_per_eni", Help: "The number of allocated ips partitioned by eni", }, []string{"eni"}, ) ) // ServeMetrics sets up ipamd metrics and introspection endpoints func ServeMetrics(metricsPort int) { log.Infof("Serving metrics on port %d", metricsPort) server := SetupMetricsServer(metricsPort) for { once := sync.Once{} _ = retry.WithBackoff(retry.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error { err := server.ListenAndServe() once.Do(func() { log.Warnf("Error running http API: %v", err) }) return err }) } } func SetupMetricsServer(metricsPort int) *http.Server { serveMux := http.NewServeMux() serveMux.Handle("/metrics", promhttp.Handler()) server := &http.Server{ Addr: ":" + strconv.Itoa(metricsPort), Handler: serveMux, ReadTimeout: 5 * time.Second, WriteTimeout: 5 * time.Second, } return server } func PrometheusRegister() { prometheus.MustRegister(IpamdErr) prometheus.MustRegister(IpamdActionsInprogress) prometheus.MustRegister(EnisMax) prometheus.MustRegister(IpMax) prometheus.MustRegister(ReconcileCnt) prometheus.MustRegister(AddIPCnt) prometheus.MustRegister(DelIPCnt) prometheus.MustRegister(PodENIErr) prometheus.MustRegister(AwsAPILatency) prometheus.MustRegister(AwsAPIErr) prometheus.MustRegister(AwsUtilsErr) prometheus.MustRegister(Ec2ApiReq) prometheus.MustRegister(Ec2ApiErr) prometheus.MustRegister(Enis) prometheus.MustRegister(TotalIPs) prometheus.MustRegister(AssignedIPs) prometheus.MustRegister(ForceRemovedENIs) prometheus.MustRegister(ForceRemovedIPs) prometheus.MustRegister(TotalPrefixes) prometheus.MustRegister(IpsPerCidr) prometheus.MustRegister(NoAvailableIPAddrs) prometheus.MustRegister(EniIPsInUse) } // This can be enhanced to get it programatically. // Initial CNI metrics helper enhancement includes only Gauge. Doesn't support GaugeVec, Counter, CounterVec and Summary func GetSupportedPrometheusCNIMetricsMapping() map[string]prometheus.Collector { prometheusCNIMetrics := map[string]prometheus.Collector{ "awscni_eni_max": EnisMax, "awscni_ip_max": IpMax, "awscni_eni_allocated": Enis, "awscni_total_ip_addresses": TotalIPs, "awscni_assigned_ip_addresses": AssignedIPs, "awscni_total_ipv4_prefixes": TotalPrefixes, } return prometheusCNIMetrics }