pkg/exporter/probe/rdma/erdma.go (77 lines of code) (raw):
package rdma
import (
"strings"
"github.com/alibaba/kubeskoop/pkg/exporter/probe"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
)
const (
linkTypeERdma = "erdma"
)
var (
erdmaStatisticCounterEntries = map[string]string{
"accept_failed_cnt": "The total number of failed connection accept attempts.",
"accept_success_cnt": "The total number of successful connection accept attempts.",
"accept_total_cnt": "The total number of connection accept attempts, successful or not.",
"cmdq_comp_cnt": "The total number of command queue completions processed.",
"cmdq_cq_armed_cnt": "The total number of command queue completion events that have been armed.",
"cmdq_eq_event_cnt": "The total number of command queue event queue events received.",
"cmdq_eq_notify_cnt": "The total number of command queue event queue notifications triggered.",
"cmdq_submitted_cnt": "The total number of command queue submissions.",
"connect_failed_cnt": "The total number of failed connection attempts.",
"connect_reset_cnt": "The total number of connection attempts that have been reset.",
"connect_success_cnt": "The total number of successful connection attempts.",
"connect_timeout_cnt": "The total number of connection attempts that timed out.",
"connect_total_cnt": "The total number of connection attempts, successful or not.",
"erdma_aeq_event_cnt": "The total number of ERDMA asynchronous event queue events received.",
"erdma_aeq_notify_cnt": "The total number of ERDMA asynchronous event queue notifications triggered.",
"hw_bps_limit_drop_cnt": "The total number of packets dropped due to hardware bandwidth limit.",
"hw_disable_drop_cnt": "The total number of packets dropped due to hardware being disabled.",
"hw_pps_limit_drop_cnt": "The total number of packets dropped due to hardware packets-per-second limit.",
"hw_rx_bps_limit_drop_cnt": "The total number of received packets dropped due to hardware receive bandwidth limit.",
"hw_rx_bytes_cnt": "The total number of bytes received by the hardware.",
"hw_rx_disable_drop_cnt": "The total number of received packets dropped due to receive hardware being disabled.",
"hw_rx_packets_cnt": "The total number of packets received by the hardware.",
"hw_rx_pps_limit_drop_cnt": "The total number of received packets dropped due to hardware receive packets-per-second limit.",
"hw_tx_bytes_cnt": "The total number of bytes transmitted by the hardware.",
"hw_tx_packets_cnt": "The total number of packets transmitted by the hardware.",
"hw_tx_reqs_cnt": "The total number of transmit requests processed by the hardware.",
"listen_create_cnt": "The total number of successfully created listen sockets.",
"listen_destroy_cnt": "The total number of destroyed listen sockets.",
"listen_failed_cnt": "The total number of failed attempts to create listen sockets.",
"listen_ipv6_cnt": "The total number of listen sockets created for IPv6 addresses.",
"listen_success_cnt": "The total number of successful listen operations.",
"reject_cnt": "The total number of received connection requests that were rejected.",
"reject_failed_cnt": "The total number of failed attempts to reject connection requests.",
"verbs_alloc_mr_cnt": "The total number of successful memory region allocations using verbs API.",
"verbs_alloc_mr_failed_cnt": "The total number of failed memory region allocation attempts using verbs API.",
"verbs_alloc_pd_cnt": "The total number of successful protection domain allocations using verbs API.",
"verbs_alloc_pd_failed_cnt": "The total number of failed protection domain allocation attempts using verbs API.",
"verbs_alloc_uctx_cnt": "The total number of successful user context allocations using verbs API.",
"verbs_alloc_uctx_failed_cnt": "The total number of failed user context allocation attempts using verbs API.",
"verbs_create_cq_cnt": "The total number of successful completion queue creations using verbs API.",
"verbs_create_cq_failed_cnt": "The total number of failed completion queue creation attempts using verbs API.",
"verbs_destroy_cq_failed_cnt": "The total number of failed completion queue deletion using verbs API.",
"verbs_create_qp_cnt": "The total number of successful queue pair creations using verbs API.",
"verbs_create_qp_failed_cnt": "The total number of failed queue pair creation attempts using verbs API.",
"verbs_destroy_qp_cnt": "The total number of failed queue pair deletion using verbs API.",
"verbs_dealloc_pd_cnt": "The total number of deallocated protection domains using verbs API.",
"verbs_dealloc_uctx_cnt": "The total number of deallocated user contexts using verbs API.",
"verbs_dereg_mr_cnt": "The total number of successful memory region deregistrations using verbs API.",
"verbs_dereg_mr_failed_cnt": "The total number of failed memory region deregistration attempts using verbs API.",
"verbs_destroy_cq_cnt": "The total number of destroyed completion queues using verbs API.",
"verbs_destroy_qp_failed_cnt": "The total number of failed attempts to destroy queue pairs (QPs) using verbs API.",
"verbs_get_dma_mr_cnt": "The total number of successful direct memory access (DMA) memory region acquisitions using verbs API.",
"verbs_get_dma_mr_failed_cnt": "The total number of failed attempts to acquire direct memory access (DMA) memory regions using verbs API.",
"verbs_reg_usr_mr_cnt": "The total number of user memory regions successfully registered with the verbs API.",
"verbs_reg_usr_mr_failed_cnt": "The total number of failed attempts to register user memory regions with the verbs API.",
}
erdmaMetrics = lo.Map(lo.Keys(erdmaStatisticCounterEntries), func(k string, _ int) probe.SingleMetricsOpts {
return probe.SingleMetricsOpts{
Name: strings.Join([]string{linkTypeERdma, k}, "_"),
VariableLabels: rdmaDevPortLabels,
Help: erdmaStatisticCounterEntries[k],
ValueType: prometheus.CounterValue,
}
})
)