prombench/manifests/cluster-infra/3b_prometheus-meta.yaml (258 lines of code) (raw):

apiVersion: v1 kind: PersistentVolumeClaim metadata: name: prometheus-meta spec: accessModes: - ReadWriteOnce resources: requests: storage: 1000Gi # If you change this make sure to update the prometheus meta disk retention settings. --- apiVersion: v1 kind: ConfigMap metadata: name: alert-rules data: prombench.rules.yml: | groups: - name: gke-related rules: - alert: benchmarkTestsRunning expr: floor((time() - kube_namespace_created{namespace=~"prombench-[0-9]+"})/(60*60*24)) >= 3 labels: severity: info prNum: '{{"{{"}} $labels.prNum {{"}}"}}' org: {{ .GITHUB_ORG }} repo: {{ .GITHUB_REPO }} annotations: description: > Benchmark tests are running for {{"{{"}} $value {{"}}"}} days! If this is intended ignore this message otherwise you can cancel it by commenting: `/prombench cancel` --- apiVersion: v1 kind: ConfigMap metadata: name: prometheus-meta data: prometheus.yaml: | global: scrape_interval: 30s rule_files: - /etc/prometheus/alerts/*.yml alerting: alertmanagers: - kubernetes_sd_configs: - role: pod tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace] regex: default action: keep - source_labels: [__meta_kubernetes_pod_label_app] regex: alertmanager action: keep - source_labels: [__meta_kubernetes_pod_label_app] regex: alertmanager action: replace target_label: __alerts_path__ replacement: '/alertmanager/api/v2/alerts' - source_labels: [__meta_kubernetes_pod_container_port_number] regex: action: drop scrape_configs: - job_name: kubelet scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics - job_name: kube-state-metrics honor_timestamps: true scheme: http kubernetes_sd_configs: - role: service relabel_configs: - separator: ; regex: __meta_kubernetes_service_label_(.+) replacement: $1 action: labelmap - source_labels: [__meta_kubernetes_service_label_k8s_app] separator: ; regex: kube-state-metrics replacement: $1 action: keep metric_relabel_configs: - action: replace source_labels: [__name__, namespace] regex: kube_namespace_created;prombench-(\d+) target_label: prNum replacement: $1 - job_name: cadvisor scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - job_name: endpoints tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: endpoints relabel_configs: - action: keep source_labels: [__meta_kubernetes_service_label_app] regex: prometheus|prometheus-meta|alertmanager|node-exporter|loadgen-querier - action: replace source_labels: [__meta_kubernetes_service_label_app] target_label: job - action: replace source_labels: [__meta_kubernetes_namespace] target_label: namespace - action: replace source_labels: [__meta_kubernetes_service_label_prometheus] target_label: prometheus - action: replace source_labels: [__meta_kubernetes_pod_node_name] target_label: nodeName - action: replace source_labels: [__meta_kubernetes_pod_label_node] target_label: node - action: replace source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_prometheus] regex: prombench-(\d+);test-pr-\d+ target_label: __metrics_path__ replacement: /${1}/prometheus-pr/metrics - action: replace source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_prometheus] regex: prombench-(\d+);test-(?:master|main|v.+|release-.+) target_label: __metrics_path__ replacement: /${1}/prometheus-release/metrics - action: replace source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_prometheus] regex: default;meta target_label: __metrics_path__ replacement: /prometheus-meta/metrics - action: replace source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_app] regex: default;alertmanager target_label: __metrics_path__ replacement: /alertmanager/metrics --- apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-meta labels: app: prometheus-meta spec: replicas: 1 selector: matchLabels: app: prometheus-meta prometheus: meta template: metadata: labels: app: prometheus-meta prometheus: meta spec: serviceAccountName: prometheus securityContext: runAsUser: 0 containers: - image: "{{ .PROMETHEUS_IMAGE_REPOSITORY }}:{{ .PROMETHEUS_IMAGE_VERSION }}" args: - "--config.file=/etc/prometheus/config/prometheus.yaml" - "--storage.tsdb.path=/data" - "--storage.tsdb.retention.size=500GB" # 50% of the total storage available. - "--web.enable-lifecycle" - "--web.external-url=http://{{ .DOMAIN_NAME }}/prometheus-meta" # JSON log format is needed for GKE to display log levels correctly. - "--log.format=json" name: prometheus volumeMounts: - name: config-volume mountPath: /etc/prometheus/config - name: alert-rules mountPath: /etc/prometheus/alerts - name: storage mountPath: /data subPath: prometheus-data ports: - name: prom-web containerPort: 9090 volumes: - name: config-volume configMap: name: prometheus-meta - name: alert-rules configMap: name: alert-rules - name: storage persistentVolumeClaim: claimName: prometheus-meta terminationGracePeriodSeconds: 300 nodeSelector: node-name: main-node --- apiVersion: v1 kind: Service metadata: name: prometheus-meta labels: prometheus: meta app: prometheus-meta spec: type: NodePort ports: - name: prom-web port: 80 targetPort: prom-web selector: app: prometheus-meta prometheus: meta --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ingress-prometheus-meta annotations: kubernetes.io/ingress.class: "nginx" nginx.ingress.kubernetes.io/ssl-redirect: "false" spec: rules: - http: paths: - backend: service: name: prometheus-meta port: name: prom-web path: /prometheus-meta pathType: Prefix