pkg/exporter/probe/rdma/rdma.go (102 lines of code) (raw):

package rdma import ( "context" "fmt" "strconv" "strings" "github.com/alibaba/kubeskoop/pkg/exporter/nettop" "github.com/prometheus/client_golang/prometheus" "github.com/samber/lo" log "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" "github.com/alibaba/kubeskoop/pkg/exporter/probe" ) const ( probeName = "rdma" linkTypeUnknown = "unknown" ) var ( resourceSummaryEntries = []string{"cm_id", "cq", "ctx", "mr", "pd", "qp"} rdmaDevLabels = []string{"device", "type"} rdmaDevPortLabels = append(rdmaDevLabels, "port") ) func init() { probe.MustRegisterMetricsProbe(probeName, metricsProbeCreator) } func metricsProbeCreator() (probe.MetricsProbe, error) { p := &metricsProbe{} opts := probe.BatchMetricsOpts{ Namespace: probe.MetricsNamespace, Subsystem: probeName, VariableLabels: probe.StandardMetricsLabels, SingleMetricsOpts: lo.Map(resourceSummaryEntries, func(entry string, _ int) probe.SingleMetricsOpts { return probe.SingleMetricsOpts{Name: entry, VariableLabels: rdmaDevLabels, Help: fmt.Sprintf("rdma resource summary %s", entry), ValueType: prometheus.GaugeValue} }), } opts.SingleMetricsOpts = append(opts.SingleMetricsOpts, mlx5Metrics...) opts.SingleMetricsOpts = append(opts.SingleMetricsOpts, erdmaMetrics...) batchMetrics := probe.NewBatchMetrics(opts, p.collectOnce) return probe.NewMetricsProbe(probeName, p, batchMetrics), nil } type metricsProbe struct { } func (p *metricsProbe) Start(_ context.Context) error { return nil } func (p *metricsProbe) Stop(_ context.Context) error { return nil } func (p *metricsProbe) collectOnce(emit probe.Emit) error { // rdma only collect host network entity, err := nettop.GetHostNetworkEntity() if err != nil { return err } rdmaRes, err := netlink.RdmaResourceList() if err != nil { return err } if len(rdmaRes) == 0 { return nil } standardLabelValues := probe.BuildStandardMetricsLabelValues(entity) for _, res := range rdmaRes { link, err := netlink.RdmaLinkByName(res.Name) if err != nil { log.Errorf("failed get rdma link %v, error: %v", res.Name, err) continue } linkType := rdmaLinkType(link) deviceLabelValues := append(standardLabelValues, res.Name, linkType) for resKey, resVal := range res.RdmaResourceSummaryEntries { emit(resKey, deviceLabelValues, float64(resVal)) } if linkType == "unknown" { continue } linkStatistics, err := netlink.RdmaStatistic(link) if err != nil { log.Errorf("failed get rdma statistics %v, error: %v", res.Name, err) continue } for _, port := range linkStatistics.RdmaPortStatistics { devicePortLabelValues := append(deviceLabelValues, strconv.FormatUint(uint64(port.PortIndex), 10)) for statKey, statVal := range port.Statistics { emit(strings.Join([]string{linkType, statKey}, "_"), devicePortLabelValues, float64(statVal)) } } } return nil } func rdmaLinkType(link *netlink.RdmaLink) string { if link == nil { return linkTypeUnknown } switch strings.Split(link.Attrs.Name, "_")[0] { case linkTypeMellanox: return linkTypeMellanox case linkTypeERdma: return linkTypeERdma default: return linkTypeUnknown } }