in pkg/operator/collection.go [367:516]
func (r *collectionReconciler) makeCollectorConfig(ctx context.Context, spec *monitoringv1.CollectionSpec, exports []monitoringv1.ExportSpec) (*prometheusConfig, []update, error) {
logger, _ := logr.FromContext(ctx)
cfg := &promconfig.Config{
GlobalConfig: promconfig.GlobalConfig{
ExternalLabels: labels.FromMap(spec.ExternalLabels),
},
}
var err error
cfg.ScrapeConfigs, err = spec.ScrapeConfigs()
if err != nil {
return nil, nil, fmt.Errorf("failed to create kubelet scrape config: %w", err)
}
cfg.RemoteWriteConfigs, err = makeRemoteWriteConfig(exports)
if err != nil {
return nil, nil, fmt.Errorf("failed to create export config: %w", err)
}
// Generate a separate scrape job for every endpoint in every PodMonitoring.
var (
podMons monitoringv1.PodMonitoringList
clusterPodMons monitoringv1.ClusterPodMonitoringList
clusterNodeMons monitoringv1.ClusterNodeMonitoringList
)
if err := r.client.List(ctx, &podMons); err != nil {
return nil, nil, fmt.Errorf("failed to list PodMonitorings: %w", err)
}
usedSecrets := monitoringv1.PrometheusSecretConfigs{}
projectID, location, cluster := resolveLabels(r.opts.ProjectID, r.opts.Location, r.opts.Cluster, spec.ExternalLabels)
var updates []update
// Mark status updates in batch with single timestamp.
for _, pmon := range podMons.Items {
cond := &monitoringv1.MonitoringCondition{
Type: monitoringv1.ConfigurationCreateSuccess,
Status: corev1.ConditionTrue,
}
cfgs, err := pmon.ScrapeConfigs(projectID, location, cluster, usedSecrets)
if err != nil {
msg := "generating scrape config failed for PodMonitoring endpoint"
cond = &monitoringv1.MonitoringCondition{
Type: monitoringv1.ConfigurationCreateSuccess,
Status: corev1.ConditionFalse,
Reason: "ScrapeConfigError",
Message: msg,
}
logger.Error(err, msg, "namespace", pmon.Namespace, "name", pmon.Name)
} else {
cfg.ScrapeConfigs = append(cfg.ScrapeConfigs, cfgs...)
}
updateStatus := pmon.Status.SetMonitoringCondition(pmon.GetGeneration(), metav1.Now(), cond)
if updateStatus {
updates = append(updates, update{
object: &pmon,
status: updateStatus,
})
}
}
if err := r.client.List(ctx, &clusterPodMons); err != nil {
return nil, nil, fmt.Errorf("failed to list ClusterPodMonitorings: %w", err)
}
// Mark status updates in batch with single timestamp.
for _, cmon := range clusterPodMons.Items {
cond := &monitoringv1.MonitoringCondition{
Type: monitoringv1.ConfigurationCreateSuccess,
Status: corev1.ConditionTrue,
}
cfgs, err := cmon.ScrapeConfigs(projectID, location, cluster, usedSecrets)
if err != nil {
msg := "generating scrape config failed for ClusterPodMonitoring endpoint"
cond = &monitoringv1.MonitoringCondition{
Type: monitoringv1.ConfigurationCreateSuccess,
Status: corev1.ConditionFalse,
Reason: "ScrapeConfigError",
Message: msg,
}
logger.Error(err, msg, "namespace", cmon.Namespace, "name", cmon.Name)
} else {
cfg.ScrapeConfigs = append(cfg.ScrapeConfigs, cfgs...)
}
updateStatus := cmon.Status.SetMonitoringCondition(cmon.GetGeneration(), metav1.Now(), cond)
if updateStatus {
updates = append(updates, update{
object: &cmon,
status: updateStatus,
})
}
}
// TODO(bwplotka): Warn about missing RBAC policies.
// https://github.com/GoogleCloudPlatform/prometheus-engine/issues/789
secretConfigs := usedSecrets.SecretConfigs()
if err := r.client.List(ctx, &clusterNodeMons); err != nil {
return nil, nil, fmt.Errorf("failed to list ClusterNodeMonitorings: %w", err)
}
// The following job names are reserved by GMP for ClusterNodeMonitoring in the
// gmp-system namespace. They will not be generated if kubeletScraping is enabled.
var (
reservedCAdvisorJobName = "gmp-kubelet-cadvisor"
reservedKubeletJobName = "gmp-kubelet-metrics"
)
// Mark status updates in batch with single timestamp.
for _, cnmon := range clusterNodeMons.Items {
if spec.KubeletScraping != nil && (cnmon.Name == reservedKubeletJobName || cnmon.Name == reservedCAdvisorJobName) {
logger.Info("ClusterNodeMonitoring job %s was not applied because OperatorConfig.collector.kubeletScraping is enabled. kubeletScraping already includes the metrics in this job.", "name", cnmon.Name)
continue
}
cond := &monitoringv1.MonitoringCondition{
Type: monitoringv1.ConfigurationCreateSuccess,
Status: corev1.ConditionTrue,
}
cfgs, err := cnmon.ScrapeConfigs(projectID, location, cluster)
if err != nil {
msg := "generating scrape config failed for ClusterNodeMonitoring endpoint"
cond = &monitoringv1.MonitoringCondition{
Type: monitoringv1.ConfigurationCreateSuccess,
Status: corev1.ConditionFalse,
Reason: "ScrapeConfigError",
Message: msg,
}
logger.Error(err, msg, "namespace", cnmon.Namespace, "name", cnmon.Name)
} else {
cfg.ScrapeConfigs = append(cfg.ScrapeConfigs, cfgs...)
}
if cnmon.Status.SetMonitoringCondition(cnmon.GetGeneration(), metav1.Now(), cond) {
updates = append(updates, update{
object: &cnmon,
status: true,
})
}
}
// Sort to ensure reproducible configs.
sort.Slice(cfg.ScrapeConfigs, func(i, j int) bool {
return cfg.ScrapeConfigs[i].JobName < cfg.ScrapeConfigs[j].JobName
})
return &prometheusConfig{
Config: *cfg,
SecretConfigs: secretConfigs,
}, updates, nil
}