pkg/operator/operator_config.go (625 lines of code) (raw):

// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package operator import ( "context" "fmt" "path" "strings" monitoringv1 "github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator/apis/monitoring/v1" "github.com/go-logr/logr" alertmanagerconfig "github.com/prometheus/alertmanager/config" promcommonconfig "github.com/prometheus/common/config" prommodel "github.com/prometheus/common/model" promconfig "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" discoverykube "github.com/prometheus/prometheus/discovery/kubernetes" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" yaml "gopkg.in/yaml.v3" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // Base resource names which may be used for multiple different resource kinds // related to the given component. const ( NameOperatorConfig = "config" NameRuleEvaluator = "rule-evaluator" NameCollector = "collector" NameAlertmanager = "alertmanager" ) // Secret paths. const ( RulesSecretName = "rules" CollectionSecretName = "collection" AlertmanagerSecretName = "alertmanager" AlertmanagerPublicSecretName = "alertmanager" AlertmanagerPublicSecretKey = "alertmanager.yaml" rulesDir = "/etc/rules" secretsDir = "/etc/secrets" AlertmanagerConfigKey = "config.yaml" ) // Collector Kubernetes Deployment extraction/detection. const ( CollectorPrometheusContainerName = "prometheus" CollectorPrometheusContainerPortName = "prom-metrics" CollectorConfigReloaderContainerPortName = "cfg-rel-metrics" RuleEvaluatorContainerName = "evaluator" AlertmanagerContainerName = "alertmanager" ) var AlertmanagerNoOpConfig = ` receivers: - name: "noop" route: receiver: "noop" ` func rulesLabels() map[string]string { return map[string]string{ LabelAppName: NameRuleEvaluator, KubernetesAppName: RuleEvaluatorAppName, } } func alertmanagerLabels() map[string]string { return map[string]string{ LabelAppName: NameAlertmanager, KubernetesAppName: AlertmanagerAppName, } } func componentAnnotations() map[string]string { return map[string]string{ AnnotationMetricName: componentName, // Allow cluster autoscaler to evict evaluator Pods even though the Pods // have an emptyDir volume mounted. This is okay since the node where the // Pod runs will be scaled down. ClusterAutoscalerSafeEvictionLabel: "true", } } // setupOperatorConfigControllers ensures a rule-evaluator // deployment as part of managed collection. func setupOperatorConfigControllers(op *Operator) error { // The singleton OperatorConfig is the request object we reconcile against. objRequest := reconcile.Request{ NamespacedName: types.NamespacedName{ Namespace: op.opts.PublicNamespace, Name: NameOperatorConfig, }, } // Default OperatorConfig filter. objFilterOperatorConfig := namespacedNamePredicate{ namespace: op.opts.PublicNamespace, name: NameOperatorConfig, } // Rule-evaluator deployment filter. objFilterRuleEvaluator := namespacedNamePredicate{ namespace: op.opts.OperatorNamespace, name: NameRuleEvaluator, } // Rule-evaluator secret filter. objFilterRuleEvaluatorSecret := namespacedNamePredicate{ namespace: op.opts.OperatorNamespace, name: RulesSecretName, } // Rule-evaluator secret filter. objFilterAlertManagerSecret := namespacedNamePredicate{ namespace: op.opts.OperatorNamespace, name: AlertmanagerSecretName, } // Reconcile operator-managed resources. err := ctrl.NewControllerManagedBy(op.manager). Named("operator-config"). // Filter events without changes for all watches. WithEventFilter(predicate.ResourceVersionChangedPredicate{}). For( &monitoringv1.OperatorConfig{}, builder.WithPredicates(objFilterOperatorConfig), ). Watches( &appsv1.Deployment{}, enqueueConst(objRequest), builder.WithPredicates( objFilterRuleEvaluator, predicate.GenerationChangedPredicate{}, )). Watches( &corev1.Secret{}, enqueueConst(objRequest), builder.WithPredicates(predicate.NewPredicateFuncs(secretFilter(op.opts.PublicNamespace))), ). // Detect and undo changes to the secret. Watches( &corev1.Secret{}, enqueueConst(objRequest), builder.WithPredicates(objFilterRuleEvaluatorSecret)). Watches( &corev1.Secret{}, enqueueConst(objRequest), builder.WithPredicates(objFilterAlertManagerSecret)). Complete(newOperatorConfigReconciler(op.manager.GetClient(), op.opts)) if err != nil { return fmt.Errorf("operator-config controller: %w", err) } return nil } // secretFilter filters by non-default Secrets in specified namespace. func secretFilter(ns string) func(object client.Object) bool { return func(object client.Object) bool { if object.GetNamespace() == ns { return !strings.HasPrefix(object.GetName(), "default-token") } return false } } // operatorConfigReconciler reconciles the OperatorConfig CRD. type operatorConfigReconciler struct { client client.Client opts Options } // newOperatorConfigReconciler creates a new operatorConfigReconciler. func newOperatorConfigReconciler(c client.Client, opts Options) *operatorConfigReconciler { return &operatorConfigReconciler{ client: c, opts: opts, } } // Reconcile ensures the OperatorConfig resource is reconciled. func (r *operatorConfigReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { logger, _ := logr.FromContext(ctx) logger.WithValues("operatorconfig", req.NamespacedName).Info("reconciling operatorconfig") config, err := r.ensureOperatorConfig(ctx, logger, req) if err != nil { return reconcile.Result{}, err } // Ensure the rule-evaluator config and grab any to-be-mirrored // secret data on the way. secretData, err := r.ensureRuleEvaluatorConfig(ctx, &config.Rules) if err != nil { return reconcile.Result{}, fmt.Errorf("ensure rule-evaluator config: %w", err) } // Ensure the alertmanager configuration is pulled from the spec. if err := r.ensureAlertmanagerConfigSecret(ctx, config.ManagedAlertmanager); err != nil { return reconcile.Result{}, fmt.Errorf("ensure alertmanager config secret: %w", err) } if err := r.ensureAlertmanagerStatefulSet(ctx, config.ManagedAlertmanager); err != nil { return reconcile.Result{}, fmt.Errorf("ensure alertmanager statefulset: %w", err) } // Mirror the fetched secret data to where the rule-evaluator can // mount and access. if err := r.ensureRuleEvaluatorSecrets(ctx, secretData); err != nil { return reconcile.Result{}, fmt.Errorf("ensure rule-evaluator secrets: %w", err) } // Ensure the rule-evaluator deployment and volume mounts. if err := r.ensureRuleEvaluatorDeployment(ctx); err != nil { return reconcile.Result{}, fmt.Errorf("ensure rule-evaluator deploy: %w", err) } return reconcile.Result{}, nil } // ensureOperatorConfig returns either the defaulted user-defined OperatorConfig, or if it // does not exist in the cluster, a default OperatorConfig. If the user-defined // OperatorConfig is missing default values, it is updated in the cluster. func (r *operatorConfigReconciler) ensureOperatorConfig(ctx context.Context, logger logr.Logger, req reconcile.Request) (*monitoringv1.OperatorConfig, error) { exists := true config := &monitoringv1.OperatorConfig{ ObjectMeta: metav1.ObjectMeta{ Namespace: req.Namespace, Name: req.Name, }, } if err := r.client.Get(ctx, req.NamespacedName, config); apierrors.IsNotFound(err) { logger.Info("no operatorconfig created yet") exists = false } else if err != nil { return nil, fmt.Errorf("get operatorconfig for incoming: %q: %w", req.String(), err) } defaulter := &operatorConfigDefaulter{ projectID: r.opts.ProjectID, location: r.opts.Location, cluster: r.opts.Cluster, } wasUpdated := defaulter.update(config) if exists && wasUpdated { if err := r.client.Update(ctx, config); err != nil { return nil, fmt.Errorf("default operatorconfig: %w", err) } } return config, nil } // ensureRuleEvaluatorConfig reconciles the config for rule-evaluator. func (r *operatorConfigReconciler) ensureRuleEvaluatorConfig(ctx context.Context, spec *monitoringv1.RuleEvaluatorSpec) (map[string][]byte, error) { cfg, secretData, err := r.makeRuleEvaluatorConfig(ctx, spec) if err != nil { return nil, fmt.Errorf("make rule-evaluator configmap: %w", err) } // Upsert rule-evaluator config. if err := r.client.Update(ctx, cfg); apierrors.IsNotFound(err) { if err := r.client.Create(ctx, cfg); err != nil { return nil, fmt.Errorf("create rule-evaluator config: %w", err) } } else if err != nil { return nil, fmt.Errorf("update rule-evaluator config: %w", err) } return secretData, nil } type RuleEvaluatorConfig struct { promconfig.Config `yaml:",inline"` // Google Cloud configuration. Matches our fork's configuration. GoogleCloud GoogleCloudConfig `yaml:"google_cloud,omitempty"` } func (config *RuleEvaluatorConfig) UnmarshalYAML(value *yaml.Node) error { // See: https://github.com/go-yaml/yaml/issues/125 // Since the Prometheus configuration uses a custom unmarshaler, it is unable to be // unmarshal-ed unless we write our own. if err := value.Decode(&config.Config); err != nil { return err } // We must replicate the nested fields. googleCloudConfig := struct { GoogleCloud GoogleCloudConfig `yaml:"google_cloud,omitempty"` }{} if err := value.Decode(&googleCloudConfig); err != nil { return err } config.GoogleCloud = googleCloudConfig.GoogleCloud return nil } type GoogleCloudQueryConfig struct { ProjectID string `yaml:"project_id,omitempty"` GeneratorURL string `yaml:"generator_url,omitempty"` CredentialsFile string `yaml:"credentials,omitempty"` } // makeRuleEvaluatorConfig creates the config for rule-evaluator. // This is stored as a Secret rather than a ConfigMap as it could contain // sensitive configuration information. func (r *operatorConfigReconciler) makeRuleEvaluatorConfig(ctx context.Context, spec *monitoringv1.RuleEvaluatorSpec) (*corev1.ConfigMap, map[string][]byte, error) { amConfigs, secretData, err := r.makeAlertmanagerConfigs(ctx, &spec.Alerting) if err != nil { return nil, nil, fmt.Errorf("make alertmanager config: %w", err) } if spec.Credentials != nil { p := pathForSelector(r.opts.PublicNamespace, &monitoringv1.SecretOrConfigMap{Secret: spec.Credentials}) b, err := getSecretKeyBytes(ctx, r.client, r.opts.PublicNamespace, spec.Credentials) if err != nil { return nil, nil, fmt.Errorf("get service account credentials: %w", err) } secretData[p] = b } // If no explicit project ID is set, use the one provided to the operator. // On GKE the rule-evaluator can also auto-detect the cluster's project // but this won't work in other Kubernetes environments. queryProjectID, _, _ := resolveLabels(r.opts.ProjectID, r.opts.Location, r.opts.Cluster, spec.ExternalLabels) if spec.QueryProjectID != "" { queryProjectID = spec.QueryProjectID } cfg := RuleEvaluatorConfig{ Config: promconfig.Config{ GlobalConfig: promconfig.GlobalConfig{ ExternalLabels: labels.FromMap(spec.ExternalLabels), }, AlertingConfig: promconfig.AlertingConfig{ AlertmanagerConfigs: amConfigs, }, RuleFiles: []string{path.Join(rulesDir, "*.yaml")}, }, GoogleCloud: GoogleCloudConfig{ Query: &GoogleCloudQueryConfig{ ProjectID: queryProjectID, GeneratorURL: spec.GeneratorURL, }, }, } if spec.Credentials != nil { credentialsFile := path.Join(secretsDir, pathForSelector(r.opts.PublicNamespace, &monitoringv1.SecretOrConfigMap{Secret: spec.Credentials})) cfg.GoogleCloud.Query.CredentialsFile = credentialsFile cfg.GoogleCloud.Export = &GoogleCloudExportConfig{ CredentialsFile: ptr.To(credentialsFile), } } cfgEncoded, err := yaml.Marshal(cfg) if err != nil { return nil, nil, fmt.Errorf("marshal Prometheus config: %w", err) } // Create rule-evaluator Secret. cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: NameRuleEvaluator, Namespace: r.opts.OperatorNamespace, }, Data: map[string]string{ configFilename: string(cfgEncoded), }, } return cm, secretData, nil } // ensureRuleEvaluatorSecrets reconciles the Secrets for rule-evaluator. func (r *operatorConfigReconciler) ensureRuleEvaluatorSecrets(ctx context.Context, data map[string][]byte) error { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: RulesSecretName, Namespace: r.opts.OperatorNamespace, Annotations: componentAnnotations(), Labels: rulesLabels(), }, Data: make(map[string][]byte), } for f, b := range data { secret.Data[f] = b } if err := r.client.Update(ctx, secret); apierrors.IsNotFound(err) { if err := r.client.Create(ctx, secret); err != nil { return fmt.Errorf("create rule-evaluator secrets: %w", err) } } else if err != nil { return fmt.Errorf("update rule-evaluator secrets: %w", err) } return nil } // ensureAlertmanagerConfigSecret copies the managed Alertmanager config secret from gmp-public. func (r *operatorConfigReconciler) ensureAlertmanagerConfigSecret(ctx context.Context, spec *monitoringv1.ManagedAlertmanagerSpec) error { logger, _ := logr.FromContext(ctx) pubNamespace := r.opts.PublicNamespace // This is the default, no-op secret config. If we find a user-defined config, // we will overwrite the default data with the user's data. // If we don't find a user config, we will still proceed with ensuring this // default secret exists (so that the alertmanager pod doesn't crash due to no // config found). This flow also handles user deletion/disabling of managed AM. secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: AlertmanagerSecretName, Namespace: r.opts.OperatorNamespace, Annotations: componentAnnotations(), Labels: alertmanagerLabels(), }, Data: map[string][]byte{AlertmanagerConfigKey: []byte(AlertmanagerNoOpConfig)}, } // Set defaults on public namespace secret. var sel = &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: AlertmanagerPublicSecretName, }, Key: AlertmanagerPublicSecretKey, } // Overwrite defaults if specified. if spec != nil && spec.ConfigSecret != nil { sel.Name = spec.ConfigSecret.Name sel.Key = spec.ConfigSecret.Key } // Try and read the secret for use. b, err := getSecretKeyBytes(ctx, r.client, pubNamespace, sel) if err != nil { if !apierrors.IsNotFound(err) { return err } // If the config secret is not found, it may have been manually deleted // (ie, to disable managed AM), so we will continue with restoring the no-op config // so that the managed AM pod doesn't crash loop. logger.Info(fmt.Sprintf("alertmanager config secret not found in namespace %s: %s", pubNamespace, err.Error())) } else { config := alertmanagerConfig{} if err := yaml.Unmarshal(b, &config); err != nil { return fmt.Errorf("load alertmanager config: %w", err) } // Only set the value if we need to. This provides a fail-safe in case users change our // Alertmanager image with their own. Otherwise, if we always set and they change the image, // their Alertmanager will fail unless they have our patch. if config.GoogleCloud.ExternalURL != spec.ExternalURL { config.GoogleCloud.ExternalURL = spec.ExternalURL b, err = yaml.Marshal(config) if err != nil { return fmt.Errorf("marshal alertmanager config: %w", err) } } secret.Data[AlertmanagerConfigKey] = b } if err := r.client.Update(ctx, secret); apierrors.IsNotFound(err) { if err := r.client.Create(ctx, secret); err != nil { return fmt.Errorf("create alertmanager config secret: %w", err) } } else if err != nil { return fmt.Errorf("update alertmanager config secret: %w", err) } return nil } type alertmanagerConfig struct { alertmanagerconfig.Config `yaml:",inline"` // Google Cloud configuration. Matches our fork's configuration. GoogleCloud googleCloudAlertmanagerConfig `yaml:"google_cloud,omitempty"` } type googleCloudAlertmanagerConfig struct { ExternalURL string `yaml:"external_url,omitempty"` } func (config *alertmanagerConfig) UnmarshalYAML(value *yaml.Node) error { // See: https://github.com/go-yaml/yaml/issues/125 // Since the Prometheus configuration uses a custom unmarshaler, it is unable to be // unmarshal-ed unless we write our own. if err := value.Decode(&config.Config); err != nil { return err } // We must replicate the nested fields. googleCloudConfig := struct { GoogleCloud googleCloudAlertmanagerConfig `yaml:"google_cloud,omitempty"` }{} if err := value.Decode(&googleCloudConfig); err != nil { return err } config.GoogleCloud = googleCloudConfig.GoogleCloud return nil } // ensureAlertmanagerStatefulSet configures the managed Alertmanager instance // to reflect the provided spec. func (r *operatorConfigReconciler) ensureAlertmanagerStatefulSet(ctx context.Context, spec *monitoringv1.ManagedAlertmanagerSpec) error { if spec == nil { return nil } logger, _ := logr.FromContext(ctx) var sset appsv1.StatefulSet err := r.client.Get(ctx, client.ObjectKey{Namespace: r.opts.OperatorNamespace, Name: NameAlertmanager}, &sset) // Some users deliberately not want to run the alertmanager. // Only emit a warning but don't cause retries // as this logic gets re-triggered anyway if the StatefulSet is created later. if apierrors.IsNotFound(err) { logger.Error(err, "Alertmanager StatefulSet does not exist") return nil } return err } // ensureRuleEvaluatorDeployment reconciles the Deployment for rule-evaluator. func (r *operatorConfigReconciler) ensureRuleEvaluatorDeployment(ctx context.Context) error { logger, _ := logr.FromContext(ctx) var deploy appsv1.Deployment err := r.client.Get(ctx, client.ObjectKey{Namespace: r.opts.OperatorNamespace, Name: NameRuleEvaluator}, &deploy) // Some users deliberately not want to run the rule-evaluator. Only emit a warning but don't cause // retries as this logic gets re-triggered anyway if the Deployment is created later. if apierrors.IsNotFound(err) { logger.Error(err, "rule-evaluator Deployment does not exist") return nil } return err } // makeAlertmanagerConfigs creates the alertmanager_config entries as described in // https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config. func (r *operatorConfigReconciler) makeAlertmanagerConfigs(ctx context.Context, spec *monitoringv1.AlertingSpec) (promconfig.AlertmanagerConfigs, map[string][]byte, error) { var ( err error configs promconfig.AlertmanagerConfigs secretData = make(map[string][]byte) ) amNamespacedName := types.NamespacedName{ Namespace: r.opts.OperatorNamespace, Name: NameAlertmanager, } // If the default Alertmanager exists, append it to the list of spec.Alertmanagers. var amSvc corev1.Service if resourceErr := r.client.Get(ctx, amNamespacedName, &amSvc); resourceErr == nil { // Alertmanager service should have one port defined, otherwise ignore. if ports := amSvc.Spec.Ports; len(ports) > 0 { // Assume first port on service is the correct endpoint. port := ports[0].Port svcDNSName := fmt.Sprintf("%s.%s:%d", amSvc.Name, amSvc.Namespace, port) cfg := promconfig.DefaultAlertmanagerConfig cfg.ServiceDiscoveryConfigs = discovery.Configs{ discovery.StaticConfig{ &targetgroup.Group{ Targets: []prommodel.LabelSet{{prommodel.AddressLabel: prommodel.LabelValue(svcDNSName)}}, }, }, } configs = append(configs, &cfg) } } for _, am := range spec.Alertmanagers { // The upstream struct is lacking the omitempty field on the API version. Thus it looks // like we explicitly set it to empty (invalid) even if left empty after marshalling. // Thus we initialize the config with defaulting. Similar applies for the embedded HTTPConfig. cfg := promconfig.DefaultAlertmanagerConfig if am.PathPrefix != "" { cfg.PathPrefix = am.PathPrefix } if am.Scheme != "" { cfg.Scheme = am.Scheme } if am.APIVersion != "" { cfg.APIVersion = promconfig.AlertmanagerAPIVersion(am.APIVersion) } // Timeout, APIVersion, PathPrefix, and Scheme all resort to defaults if left unspecified. if am.Timeout != "" { cfg.Timeout, err = prommodel.ParseDuration(am.Timeout) if err != nil { return nil, nil, fmt.Errorf("invalid timeout: %w", err) } } // Authorization. if am.Authorization != nil { cfg.HTTPClientConfig.Authorization = &promcommonconfig.Authorization{ Type: am.Authorization.Type, } if c := am.Authorization.Credentials; c != nil { b, err := getSecretKeyBytes(ctx, r.client, r.opts.PublicNamespace, c) if err != nil { return nil, nil, err } p := pathForSelector(r.opts.PublicNamespace, &monitoringv1.SecretOrConfigMap{Secret: c}) secretData[p] = b cfg.HTTPClientConfig.Authorization.CredentialsFile = path.Join(secretsDir, p) } } // TLS config. if am.TLS != nil { minVersion, err := monitoringv1.TLSVersionFromString(am.TLS.MinVersion) if err != nil { return nil, nil, fmt.Errorf("unable to convert TLS min version: %w", err) } maxVersion, err := monitoringv1.TLSVersionFromString(am.TLS.MaxVersion) if err != nil { return nil, nil, fmt.Errorf("unable to convert TLS min version: %w", err) } tlsCfg := promcommonconfig.TLSConfig{ InsecureSkipVerify: am.TLS.InsecureSkipVerify, ServerName: am.TLS.ServerName, MinVersion: minVersion, MaxVersion: maxVersion, } if am.TLS.CA != nil { p := pathForSelector(r.opts.PublicNamespace, am.TLS.CA) b, err := getSecretOrConfigMapBytes(ctx, r.client, r.opts.PublicNamespace, am.TLS.CA) if err != nil { return nil, nil, err } secretData[p] = b tlsCfg.CAFile = path.Join(secretsDir, p) } if am.TLS.Cert != nil { p := pathForSelector(r.opts.PublicNamespace, am.TLS.Cert) b, err := getSecretOrConfigMapBytes(ctx, r.client, r.opts.PublicNamespace, am.TLS.Cert) if err != nil { return nil, nil, err } secretData[p] = b tlsCfg.CertFile = path.Join(secretsDir, p) } if am.TLS.KeySecret != nil { p := pathForSelector(r.opts.PublicNamespace, &monitoringv1.SecretOrConfigMap{Secret: am.TLS.KeySecret}) b, err := getSecretKeyBytes(ctx, r.client, r.opts.PublicNamespace, am.TLS.KeySecret) if err != nil { return nil, nil, err } secretData[p] = b tlsCfg.KeyFile = path.Join(secretsDir, p) } cfg.HTTPClientConfig.TLSConfig = tlsCfg } // Configure discovery of AM endpoints via Kubernetes API. cfg.ServiceDiscoveryConfigs = discovery.Configs{ &discoverykube.SDConfig{ // Must instantiate a default client config explicitly as the follow_redirects // field lacks the omitempty tag. Thus it looks like we explicitly set it to false // even if left empty after marshalling. HTTPClientConfig: promcommonconfig.DefaultHTTPClientConfig, Role: discoverykube.RoleEndpoint, NamespaceDiscovery: discoverykube.NamespaceDiscovery{ Names: []string{am.Namespace}, }, }, } svcNameRE, err := relabel.NewRegexp(am.Name) if err != nil { return nil, nil, fmt.Errorf("cannot build regex from service name %q: %w", am.Name, err) } cfg.RelabelConfigs = append(cfg.RelabelConfigs, &relabel.Config{ Action: relabel.Keep, SourceLabels: prommodel.LabelNames{"__meta_kubernetes_endpoints_name"}, Regex: svcNameRE, }) if am.Port.StrVal != "" { re, err := relabel.NewRegexp(am.Port.String()) if err != nil { return nil, nil, fmt.Errorf("cannot build regex from port %q: %w", am.Port, err) } cfg.RelabelConfigs = append(cfg.RelabelConfigs, &relabel.Config{ Action: relabel.Keep, SourceLabels: prommodel.LabelNames{"__meta_kubernetes_endpoint_port_name"}, Regex: re, }) } else if am.Port.IntVal != 0 { // The endpoints object does not provide a meta label for the port number. If the endpoint // is backed by a pod we can inspect the pod port number label, but to make it work in general // we simply override the port in the address label. // If the endpoints has multiple ports, this will create duplicate targets but they will be // deduplicated by the discovery engine. re, err := relabel.NewRegexp(`(.+):\d+`) if err != nil { return nil, nil, fmt.Errorf("building address regex failed: %w", err) } cfg.RelabelConfigs = append(cfg.RelabelConfigs, &relabel.Config{ Action: relabel.Replace, SourceLabels: prommodel.LabelNames{"__address__"}, Regex: re, TargetLabel: "__address__", Replacement: fmt.Sprintf("$1:%d", am.Port.IntVal), }) } // TODO(pintohutch): add support for basic_auth, oauth2, proxy_url, follow_redirects. // Append to alertmanagers config array. configs = append(configs, &cfg) } return configs, secretData, nil } // getSecretOrConfigMapBytes is a helper function to conditionally fetch // the secret or configmap selector payloads. func getSecretOrConfigMapBytes(ctx context.Context, c client.Reader, namespace string, scm *monitoringv1.SecretOrConfigMap) ([]byte, error) { var ( b []byte err error ) if secret := scm.Secret; secret != nil { b, err = getSecretKeyBytes(ctx, c, namespace, secret) if err != nil { return b, err } } else if cm := scm.ConfigMap; cm != nil { b, err = getConfigMapKeyBytes(ctx, c, namespace, cm) if err != nil { return b, err } } return b, nil } // getSecretKeyBytes processes the given NamespacedSecretKeySelector and returns the referenced data. func getSecretKeyBytes(ctx context.Context, c client.Reader, namespace string, sel *corev1.SecretKeySelector) ([]byte, error) { var ( secret = &corev1.Secret{} nn = types.NamespacedName{ Namespace: namespace, Name: sel.Name, } ) err := c.Get(ctx, nn, secret) if err != nil { return nil, fmt.Errorf("get secret: %w", err) } if b, ok := secret.Data[sel.Key]; ok { return b, nil } else if s, ok := secret.StringData[sel.Key]; ok { return []byte(s), nil } return nil, fmt.Errorf("key %q in secret %q not found", sel.Key, sel.Name) } // getConfigMapKeyBytes processes the given NamespacedConfigMapKeySelector and returns the referenced data. func getConfigMapKeyBytes(ctx context.Context, c client.Reader, namespace string, sel *corev1.ConfigMapKeySelector) ([]byte, error) { var ( cm = &corev1.ConfigMap{} nn = types.NamespacedName{ Namespace: namespace, Name: sel.Name, } ) err := c.Get(ctx, nn, cm) if err != nil { return nil, fmt.Errorf("get configmap: %w", err) } if s, ok := cm.Data[sel.Key]; ok { return []byte(s), nil } else if b, ok := cm.BinaryData[sel.Key]; ok { return b, nil } return nil, fmt.Errorf("key %q in configmap %q not found", sel.Key, sel.Name) } // pathForSelector cretes the filepath for the provided NamespacedSecretOrConfigMap. // This can be used to avoid naming collisions of like-keys across K8s resources. func pathForSelector(namespace string, scm *monitoringv1.SecretOrConfigMap) string { if scm == nil { return "" } if scm.ConfigMap != nil { return fmt.Sprintf("%s_%s_%s_%s", "configmap", namespace, scm.ConfigMap.Name, scm.ConfigMap.Key) } if scm.Secret != nil { return fmt.Sprintf("%s_%s_%s_%s", "secret", namespace, scm.Secret.Name, scm.Secret.Key) } return "" }