prombench/manifests/cluster-infra/3b_prometheus-meta.yaml (258 lines of code) (raw):
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-meta
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1000Gi # If you change this make sure to update the prometheus meta disk retention settings.
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alert-rules
data:
prombench.rules.yml: |
groups:
- name: gke-related
rules:
- alert: benchmarkTestsRunning
expr: floor((time() - kube_namespace_created{namespace=~"prombench-[0-9]+"})/(60*60*24)) >= 3
labels:
severity: info
prNum: '{{"{{"}} $labels.prNum {{"}}"}}'
org: {{ .GITHUB_ORG }}
repo: {{ .GITHUB_REPO }}
annotations:
description: >
Benchmark tests are running for {{"{{"}} $value {{"}}"}} days!
If this is intended ignore this message otherwise you can cancel it by commenting: `/prombench cancel`
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-meta
data:
prometheus.yaml: |
global:
scrape_interval: 30s
rule_files:
- /etc/prometheus/alerts/*.yml
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: pod
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace]
regex: default
action: keep
- source_labels: [__meta_kubernetes_pod_label_app]
regex: alertmanager
action: keep
- source_labels: [__meta_kubernetes_pod_label_app]
regex: alertmanager
action: replace
target_label: __alerts_path__
replacement: '/alertmanager/api/v2/alerts'
- source_labels: [__meta_kubernetes_pod_container_port_number]
regex:
action: drop
scrape_configs:
- job_name: kubelet
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: kube-state-metrics
honor_timestamps: true
scheme: http
kubernetes_sd_configs:
- role: service
relabel_configs:
- separator: ;
regex: __meta_kubernetes_service_label_(.+)
replacement: $1
action: labelmap
- source_labels: [__meta_kubernetes_service_label_k8s_app]
separator: ;
regex: kube-state-metrics
replacement: $1
action: keep
metric_relabel_configs:
- action: replace
source_labels: [__name__, namespace]
regex: kube_namespace_created;prombench-(\d+)
target_label: prNum
replacement: $1
- job_name: cadvisor
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: endpoints
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_label_app]
regex: prometheus|prometheus-meta|alertmanager|node-exporter|loadgen-querier
- action: replace
source_labels: [__meta_kubernetes_service_label_app]
target_label: job
- action: replace
source_labels: [__meta_kubernetes_namespace]
target_label: namespace
- action: replace
source_labels: [__meta_kubernetes_service_label_prometheus]
target_label: prometheus
- action: replace
source_labels: [__meta_kubernetes_pod_node_name]
target_label: nodeName
- action: replace
source_labels: [__meta_kubernetes_pod_label_node]
target_label: node
- action: replace
source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_prometheus]
regex: prombench-(\d+);test-pr-\d+
target_label: __metrics_path__
replacement: /${1}/prometheus-pr/metrics
- action: replace
source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_prometheus]
regex: prombench-(\d+);test-(?:master|main|v.+|release-.+)
target_label: __metrics_path__
replacement: /${1}/prometheus-release/metrics
- action: replace
source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_prometheus]
regex: default;meta
target_label: __metrics_path__
replacement: /prometheus-meta/metrics
- action: replace
source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_label_app]
regex: default;alertmanager
target_label: __metrics_path__
replacement: /alertmanager/metrics
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-meta
labels:
app: prometheus-meta
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-meta
prometheus: meta
template:
metadata:
labels:
app: prometheus-meta
prometheus: meta
spec:
serviceAccountName: prometheus
securityContext:
runAsUser: 0
containers:
- image: "{{ .PROMETHEUS_IMAGE_REPOSITORY }}:{{ .PROMETHEUS_IMAGE_VERSION }}"
args:
- "--config.file=/etc/prometheus/config/prometheus.yaml"
- "--storage.tsdb.path=/data"
- "--storage.tsdb.retention.size=500GB" # 50% of the total storage available.
- "--web.enable-lifecycle"
- "--web.external-url=http://{{ .DOMAIN_NAME }}/prometheus-meta"
# JSON log format is needed for GKE to display log levels correctly.
- "--log.format=json"
name: prometheus
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus/config
- name: alert-rules
mountPath: /etc/prometheus/alerts
- name: storage
mountPath: /data
subPath: prometheus-data
ports:
- name: prom-web
containerPort: 9090
volumes:
- name: config-volume
configMap:
name: prometheus-meta
- name: alert-rules
configMap:
name: alert-rules
- name: storage
persistentVolumeClaim:
claimName: prometheus-meta
terminationGracePeriodSeconds: 300
nodeSelector:
node-name: main-node
---
apiVersion: v1
kind: Service
metadata:
name: prometheus-meta
labels:
prometheus: meta
app: prometheus-meta
spec:
type: NodePort
ports:
- name: prom-web
port: 80
targetPort: prom-web
selector:
app: prometheus-meta
prometheus: meta
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ingress-prometheus-meta
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/ssl-redirect: "false"
spec:
rules:
- http:
paths:
- backend:
service:
name: prometheus-meta
port:
name: prom-web
path: /prometheus-meta
pathType: Prefix