pkg/operator/apis/monitoring/v1/pod_config.go (443 lines of code) (raw):
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v1
import (
"errors"
"fmt"
"maps"
"slices"
"strings"
"github.com/GoogleCloudPlatform/prometheus-engine/pkg/export"
"github.com/prometheus/common/config"
prommodel "github.com/prometheus/common/model"
promconfig "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
discoverykube "github.com/prometheus/prometheus/discovery/kubernetes"
"github.com/prometheus/prometheus/model/relabel"
)
const (
labelCluster = "cluster"
labelLocation = "location"
labelProjectID = "project_id"
labelContainer = "container"
labelNamespace = "namespace"
labelNode = "node"
labelPod = "pod"
labelTopLevelControllerName = "top_level_controller_name"
labelTopLevelControllerType = "top_level_controller_type"
)
var (
allowedClusterPodMonitoringLabel = map[string]bool{
labelContainer: true,
labelNamespace: true,
labelNode: true,
labelPod: true,
labelTopLevelControllerName: true,
labelTopLevelControllerType: true,
}
allowedClusterPodMonitoringLabels = slices.Sorted(maps.Keys(allowedClusterPodMonitoringLabel))
allowedPodMonitoringLabel = map[string]bool{
labelContainer: true,
labelNode: true,
labelPod: true,
labelTopLevelControllerName: true,
labelTopLevelControllerType: true,
}
allowedPodMonitoringLabels = slices.Sorted(maps.Keys(allowedClusterPodMonitoringLabel))
topLevelControllerNameRules = []*relabel.Config{
// First, capture the controller name from the pod manifest.
{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_name"},
TargetLabel: labelTopLevelControllerName,
},
// If the controller kind is a ReplicaSet and it has a pod template hash, it belongs to a deployment.
// The name of the deployment is the name of the ReplicaSet with the hash truncated.
{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_kind", "__meta_kubernetes_pod_labelpresent_pod_template_hash", "__meta_kubernetes_pod_controller_name"},
Regex: relabel.MustNewRegexp("ReplicaSet;true;(.+)-[a-z0-9]+"),
TargetLabel: labelTopLevelControllerName,
},
// If the controller kind is Job and it has a 8-digit numeric suffix (i.e. timestamp), assume the Job was created by a CronJob.
// The name of the deployment is the name of the Job with the timestamp truncated.
{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_kind", "__meta_kubernetes_pod_controller_name"},
Regex: relabel.MustNewRegexp("Job;(.+)-\\d{8}$"),
TargetLabel: labelTopLevelControllerName,
},
}
topLevelControllerTypeRules = []*relabel.Config{
// First, capture the controller name from the pod manifest.
{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_kind"},
TargetLabel: labelTopLevelControllerType,
},
// If the controller kind is a ReplicaSet and it has a pod template hash, it belongs to a deployment.
{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_kind", "__meta_kubernetes_pod_labelpresent_pod_template_hash", "__meta_kubernetes_pod_controller_name"},
Regex: relabel.MustNewRegexp("ReplicaSet;true;(.+)-[a-z0-9]+"),
TargetLabel: labelTopLevelControllerType,
Replacement: "Deployment",
},
// If the controller kind is Job and it has a 8-digit numeric suffix (i.e. timestamp), assume the Job was created by a CronJob.
{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_kind", "__meta_kubernetes_pod_controller_name"},
Regex: relabel.MustNewRegexp("Job;(.+)-\\d{8}$"),
TargetLabel: labelTopLevelControllerType,
Replacement: "CronJob",
},
}
)
// ScrapeConfigs generates Prometheus scrape configs for the PodMonitoring.
func (p *PodMonitoring) ScrapeConfigs(projectID, location, cluster string, pool PrometheusSecretConfigs) (res []*promconfig.ScrapeConfig, err error) {
relabelCfgs := []*relabel.Config{
// Force target labels, so they cannot be overwritten by metric labels.
{
Action: relabel.Replace,
TargetLabel: labelProjectID,
Replacement: projectID,
},
{
Action: relabel.Replace,
TargetLabel: labelLocation,
Replacement: location,
},
{
Action: relabel.Replace,
TargetLabel: labelCluster,
Replacement: cluster,
},
}
return p.scrapeConfigs(relabelCfgs, pool)
}
// ScrapeConfigs generates Prometheus scrape configs for the PodMonitoring.
func (p *PodMonitoring) scrapeConfigs(relabelCfgs []*relabel.Config, pool PrometheusSecretConfigs) (res []*promconfig.ScrapeConfig, err error) {
relabelCfgs = append(relabelCfgs, &relabel.Config{
// Filter targets by namespace of the PodMonitoring configuration.
Action: relabel.Keep,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_namespace"},
Regex: relabel.MustNewRegexp(p.Namespace),
})
for i := range p.Spec.Endpoints {
// Each scrape endpoint has its own relabel config so make sure we copy the array.
c, err := p.endpointScrapeConfig(i, append([]*relabel.Config(nil), relabelCfgs...), pool)
if err != nil {
return nil, fmt.Errorf("invalid definition for endpoint with index %d: %w", i, err)
}
res = append(res, c)
}
return res, validateDistinctJobNames(res)
}
func (p *PodMonitoring) endpointScrapeConfig(index int, relabelCfgs []*relabel.Config, pool PrometheusSecretConfigs) (*promconfig.ScrapeConfig, error) {
// Filter targets that belong to selected pods.
selectors, err := relabelingsForSelector(p.Spec.Selector, p)
if err != nil {
return nil, err
}
relabelCfgs = append(relabelCfgs, selectors...)
metadataLabels := make(map[string]bool)
// The metadata list must be always set in general but we allow the null case
// for backwards compatibility and won't add any labels in that case.
if p.Spec.TargetLabels.Metadata != nil {
for _, l := range *p.Spec.TargetLabels.Metadata {
if !allowedPodMonitoringLabel[l] {
return nil, fmt.Errorf("metadata label %q not allowed, must be one of %v", l, allowedPodMonitoringLabels)
}
metadataLabels[l] = true
}
}
relabelCfgs = append(relabelCfgs, relabelingsForMetadata(metadataLabels)...)
// The namespace label is always set for PodMonitorings.
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_namespace"},
TargetLabel: labelNamespace,
})
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Replace,
Replacement: p.Name,
TargetLabel: "job",
})
// Drop any non-running pods if left unspecified or explicitly enabled.
if p.Spec.FilterRunning == nil || *p.Spec.FilterRunning {
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Drop,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_phase"},
Regex: relabel.MustNewRegexp("(Failed|Succeeded)"),
})
}
return endpointScrapeConfig(
p,
p.Spec.Endpoints[index],
relabelCfgs,
p.Spec.TargetLabels.FromPod,
p.Spec.Limits,
pool,
)
}
// ScrapeConfigs generates Prometheus scrape configs for the PodMonitoring.
func (c *ClusterPodMonitoring) ScrapeConfigs(projectID, location, cluster string, pool PrometheusSecretConfigs) (res []*promconfig.ScrapeConfig, err error) {
relabelCfgs := []*relabel.Config{
// Force target labels, so they cannot be overwritten by metric labels.
{
Action: relabel.Replace,
TargetLabel: labelProjectID,
Replacement: projectID,
},
{
Action: relabel.Replace,
TargetLabel: labelLocation,
Replacement: location,
},
{
Action: relabel.Replace,
TargetLabel: labelCluster,
Replacement: cluster,
},
}
return c.scrapeConfigs(relabelCfgs, pool)
}
func (c *ClusterPodMonitoring) scrapeConfigs(relabelCfgs []*relabel.Config, pool PrometheusSecretConfigs) (res []*promconfig.ScrapeConfig, err error) {
for i := range c.Spec.Endpoints {
// Each scrape endpoint has its own relabel config so make sure we copy the array.
c, err := c.endpointScrapeConfig(i, append([]*relabel.Config(nil), relabelCfgs...), pool)
if err != nil {
return nil, fmt.Errorf("invalid definition for endpoint with index %d: %w", i, err)
}
res = append(res, c)
}
return res, validateDistinctJobNames(res)
}
func (c *ClusterPodMonitoring) endpointScrapeConfig(index int, relabelCfgs []*relabel.Config, pool PrometheusSecretConfigs) (*promconfig.ScrapeConfig, error) {
// Filter targets that belong to selected pods.
selectors, err := relabelingsForSelector(c.Spec.Selector, c)
if err != nil {
return nil, err
}
relabelCfgs = append(relabelCfgs, selectors...)
metadataLabels := make(map[string]bool)
// The metadata list must be always set in general but we allow the null case
// for backwards compatibility. In that case we must always add the namespace label.
if c.Spec.TargetLabels.Metadata == nil {
metadataLabels = map[string]bool{
labelNamespace: true,
}
} else {
for _, l := range *c.Spec.TargetLabels.Metadata {
if !allowedClusterPodMonitoringLabel[l] {
return nil, fmt.Errorf("metadata label %q not allowed, must be one of %v", l, allowedClusterPodMonitoringLabels)
}
metadataLabels[l] = true
}
}
relabelCfgs = append(relabelCfgs, relabelingsForMetadata(metadataLabels)...)
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Replace,
Replacement: c.Name,
TargetLabel: "job",
})
// Drop any non-running pods if left unspecified or explicitly enabled.
if c.Spec.FilterRunning == nil || *c.Spec.FilterRunning {
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Drop,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_phase"},
Regex: relabel.MustNewRegexp("(Failed|Succeeded)"),
})
}
return endpointScrapeConfig(
c,
c.Spec.Endpoints[index],
relabelCfgs,
c.Spec.TargetLabels.FromPod,
c.Spec.Limits,
pool,
)
}
func endpointScrapeConfig(
m PodMonitoringCRD,
ep ScrapeEndpoint,
relabelCfgs []*relabel.Config,
podLabels []LabelMapping,
limits *ScrapeLimits,
pool PrometheusSecretConfigs,
) (*promconfig.ScrapeConfig, error) {
id := m.GetKey()
// Configure how Prometheus talks to the Kubernetes API server to discover targets.
// This configuration is the same for all scrape jobs (esp. selectors).
// This ensures that Prometheus can reuse the underlying client and caches, which reduces
// load on the Kubernetes API server.
discoveryCfgs := discovery.Configs{
&discoverykube.SDConfig{
HTTPClientConfig: config.DefaultHTTPClientConfig,
Role: discoverykube.RolePod,
// Drop all potential targets not the same node as the collector. The $(NODE_NAME) variable
// is interpolated by the config reloader sidecar before the config reaches the Prometheus collector.
// Doing it through selectors rather than relabeling should substantially reduce the client and
// server side load.
Selectors: []discoverykube.SelectorConfig{
{
Role: discoverykube.RolePod,
Field: fmt.Sprintf("spec.nodeName=$(%s)", EnvVarNodeName),
},
},
},
}
relabelCfgs = append(relabelCfgs,
// Use the pod name as the primary identifier in the instance label. Unless the pod
// is controlled by a DaemonSet, in which case the node name will be used.
// This provides a better user experience on dashboards which template on the instance label
// and expect it to have meaningful value, such as common node exporter dashboards.
//
// Save the value in a temporary label and use it further down.
&relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_name"},
TargetLabel: "__tmp_instance",
},
&relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_controller_kind", "__meta_kubernetes_pod_node_name"},
Regex: relabel.MustNewRegexp(`DaemonSet;(.*)`),
TargetLabel: "__tmp_instance",
Replacement: "$1",
},
)
// Filter targets by the configured port.
if ep.Port.StrVal != "" {
portValue, err := relabel.NewRegexp(ep.Port.StrVal)
if err != nil {
return nil, fmt.Errorf("invalid port name %q: %w", ep.Port, err)
}
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Keep,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_container_port_name"},
Regex: portValue,
})
// The instance label being the pod name would be ideal UX-wise. But we cannot be certain
// that multiple metrics endpoints on a pod don't expose metrics with the same name. Thus
// we have to disambiguate along the port as well.
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__tmp_instance", "__meta_kubernetes_pod_container_port_name"},
Regex: relabel.MustNewRegexp("(.+);(.+)"),
Replacement: "$1:$2",
TargetLabel: "instance",
})
} else if ep.Port.IntVal != 0 {
// Prometheus generates a target candidate for each declared port in a pod.
// If a container in a pod has no declared port, a single target candidate is generated for
// that container.
//
// If a numeric port is specified for scraping but not declared in the pod, we still
// want to allow scraping it. For that we must ensure that we produce a single final output
// target for that numeric port. The only way to achieve this is to produce identical output
// targets for all incoming target candidates for that pod and producing identical output
// targets for each.
// This requires leaving the container label empty (or at a singleton value) even if it is
// requested as an output label via .targetLabels.metadata. This aligns with the Pod specification,
// which requires port names in a Pod to be unique but not port numbers. Thus, the container is
// potentially ambiguous for numerical ports in any case.
// First, drop the container label even if it was added before.
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.LabelDrop,
Regex: relabel.MustNewRegexp(labelContainer),
})
// Then, rewrite the instance and __address__ for each candidate to the same values.
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__tmp_instance"},
Replacement: fmt.Sprintf("$1:%d", ep.Port.IntVal),
TargetLabel: "instance",
})
relabelCfgs = append(relabelCfgs, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_ip"},
Replacement: fmt.Sprintf("$1:%d", ep.Port.IntVal),
TargetLabel: "__address__",
})
} else {
return nil, errors.New("port must be set")
}
// Add pod labels.
pCfgs, err := labelMappingRelabelConfigs(podLabels, "__meta_kubernetes_pod_label_")
if err != nil {
return nil, fmt.Errorf("invalid pod label mapping: %w", err)
}
relabelCfgs = append(relabelCfgs, pCfgs...)
httpCfg, err := ep.ToPrometheusConfig(m, pool)
if err != nil {
return nil, fmt.Errorf("unable to parse or invalid Prometheus HTTP client config: %w", err)
}
if err := httpCfg.Validate(); err != nil {
return nil, fmt.Errorf("invalid Prometheus HTTP client config: %w", err)
}
return buildPrometheusScrapeConfig(fmt.Sprintf("%s/%s", id, &ep.Port), discoveryCfgs, httpCfg, relabelCfgs, limits, ep)
}
func relabelingsForMetadata(keys map[string]bool) (res []*relabel.Config) {
if keys[labelNamespace] {
res = append(res, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_namespace"},
TargetLabel: labelNamespace,
})
}
if keys[labelPod] {
res = append(res, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_name"},
TargetLabel: labelPod,
})
}
if keys[labelContainer] {
res = append(res, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_container_name"},
TargetLabel: labelContainer,
})
}
if keys[labelNode] {
res = append(res, &relabel.Config{
Action: relabel.Replace,
SourceLabels: prommodel.LabelNames{"__meta_kubernetes_pod_node_name"},
TargetLabel: labelNode,
})
}
if keys[labelTopLevelControllerName] {
res = append(res, topLevelControllerNameRules...)
}
if keys[labelTopLevelControllerType] {
res = append(res, topLevelControllerTypeRules...)
}
return res
}
// convertRelabelingRule converts the rule to a relabel configuration. An error is returned
// if the rule would modify one of the protected labels.
func convertRelabelingRule(r RelabelingRule) (*relabel.Config, error) {
rcfg := &relabel.Config{
// Upstream applies ToLower when digesting the config, so we allow the same.
Action: relabel.Action(strings.ToLower(r.Action)),
TargetLabel: r.TargetLabel,
Separator: r.Separator,
Replacement: r.Replacement,
Modulus: r.Modulus,
}
for _, n := range r.SourceLabels {
rcfg.SourceLabels = append(rcfg.SourceLabels, prommodel.LabelName(n))
}
// Instantiate the default regex Prometheus uses so that the checks below can be run
// if no explicit value is provided.
re := relabel.MustNewRegexp(`(.*)`)
// We must only set the regex if its not empty. Like in other cases, the Prometheus code does
// not setup the structs correctly and this would default to the string "null" when marshalled,
// which is then interpreted as a regex again when read by Prometheus.
if r.Regex != "" {
var err error
re, err = relabel.NewRegexp(r.Regex)
if err != nil {
return nil, fmt.Errorf("invalid regex %q: %w", r.Regex, err)
}
rcfg.Regex = re
}
// Validate that the protected target labels are not mutated by the provided relabeling rules.
switch rcfg.Action {
// Default action is "replace" per https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config.
case relabel.Replace, relabel.HashMod, "":
// These actions write into the target label and it must not be a protected one.
if protectedLabel[r.TargetLabel] {
return nil, fmt.Errorf("cannot relabel with action %q onto protected label %q", r.Action, r.TargetLabel)
}
case relabel.LabelDrop:
if matchesAnyProtectedLabel(re) {
return nil, fmt.Errorf("regex %s would drop at least one of the protected labels %v", r.Regex, protectedLabels)
}
case relabel.LabelKeep:
// Keep drops all labels that don't match the regex. So all protected labels must
// match keep.
if !matchesAllProtectedLabels(re) {
return nil, fmt.Errorf("regex %s would drop at least one of the protected labels %s", r.Regex, protectedLabels)
}
case relabel.LabelMap:
// It is difficult to prove for certain that labelmap does not override a protected label.
// Thus we just prohibit its use for now.
// The most feasible way to support this would probably be store all protected labels
// in __tmp_protected_<name> via a replace rule, then apply labelmap, then replace the
// __tmp label back onto the protected label.
return nil, fmt.Errorf("relabeling with action %q not allowed", r.Action)
case relabel.Keep, relabel.Drop:
// These actions don't modify a series and are OK.
default:
return nil, fmt.Errorf("unknown relabeling action %q", r.Action)
}
return rcfg, nil
}
var (
protectedLabel = map[string]bool{
export.KeyProjectID: true,
export.KeyLocation: true,
export.KeyCluster: true,
export.KeyNamespace: true,
export.KeyJob: true,
export.KeyInstance: true,
"__address__": true,
}
protectedLabels = slices.Sorted(maps.Keys(protectedLabel))
)
func matchesAnyProtectedLabel(re relabel.Regexp) bool {
for pl := range protectedLabel {
if re.MatchString(pl) {
return true
}
}
return false
}
func matchesAllProtectedLabels(re relabel.Regexp) bool {
for pl := range protectedLabel {
if !re.MatchString(pl) {
return false
}
}
return true
}
// labelMappingRelabelConfigs generates relabel configs using a provided mapping and resource prefix.
func labelMappingRelabelConfigs(mappings []LabelMapping, prefix string) ([]*relabel.Config, error) {
var relabelCfgs []*relabel.Config
for _, m := range mappings {
// `To` can be unset, default to `From`.
if m.To == "" {
m.To = m.From
}
rcfg, err := convertRelabelingRule(RelabelingRule{
Action: "replace",
SourceLabels: []string{prefix + string(sanitizeLabelName(m.From))},
TargetLabel: m.To,
})
if err != nil {
return nil, err
}
relabelCfgs = append(relabelCfgs, rcfg)
}
return relabelCfgs, nil
}