attached-logging-monitoring/monitoring/prometheus.yaml (236 lines of code) (raw):

# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # [START anthos_monitoring_prometheus_serviceaccount_stackdriver_prometheus] apiVersion: v1 kind: ServiceAccount metadata: name: stackdriver-prometheus namespace: kube-system # [END anthos_monitoring_prometheus_serviceaccount_stackdriver_prometheus] --- # [START anthos_monitoring_prometheus_clusterrole_stackdriver_user_prometheus] apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: stackdriver-user:prometheus namespace: kube-system rules: - apiGroups: - "" resources: - nodes - services - endpoints - pods verbs: - get - list - watch # [END anthos_monitoring_prometheus_clusterrole_stackdriver_user_prometheus] --- # [START anthos_monitoring_prometheus_clusterrolebinding_stackdriver_user_prometheus] apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: stackdriver-user:prometheus namespace: kube-system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: stackdriver-user:prometheus subjects: - kind: ServiceAccount name: stackdriver-prometheus namespace: kube-system # [END anthos_monitoring_prometheus_clusterrolebinding_stackdriver_user_prometheus] --- # [START anthos_monitoring_prometheus_serviceaccount_stackdriver_prometheus_scrape] apiVersion: v1 kind: ServiceAccount metadata: name: stackdriver-prometheus-scrape namespace: kube-system # [END anthos_monitoring_prometheus_serviceaccount_stackdriver_prometheus_scrape] --- # [START anthos_monitoring_prometheus_clusterrole_stackdriver_prometheus_scrape] apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: stackdriver-prometheus-scrape namespace: kube-system rules: - apiGroups: - "" resources: - nodes/metrics verbs: - get - nonResourceURLs: - /metrics - /metrics/cadvisor - /api/v1/metrics/prometheus verbs: - get # [END anthos_monitoring_prometheus_clusterrole_stackdriver_prometheus_scrape] --- # [START anthos_monitoring_prometheus_clusterrolebinding_stackdriver_prometheus_scrape] apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: stackdriver-prometheus-scrape namespace: kube-system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: stackdriver-prometheus-scrape subjects: - kind: ServiceAccount name: stackdriver-prometheus-scrape namespace: kube-system # [END anthos_monitoring_prometheus_clusterrolebinding_stackdriver_prometheus_scrape] --- # [START anthos_monitoring_prometheus_secret_stackdriver_prometheus_scrape] apiVersion: v1 kind: Secret metadata: name: stackdriver-prometheus-scrape namespace: kube-system annotations: kubernetes.io/service-account.name: stackdriver-prometheus-scrape type: kubernetes.io/service-account-token # [END anthos_monitoring_prometheus_secret_stackdriver_prometheus_scrape] --- # [START anthos_monitoring_prometheus_service_stackdriver_prometheus_k8s] # Headless service is required for network identity of the pods inside StatefulSet. # See details at https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations apiVersion: v1 kind: Service metadata: name: stackdriver-prometheus-k8s namespace: kube-system labels: app: stackdriver-prometheus-k8s spec: ports: - name: http port: 9090 protocol: TCP targetPort: 9090 sessionAffinity: ClientIP selector: app: stackdriver-prometheus-k8s # [END anthos_monitoring_prometheus_service_stackdriver_prometheus_k8s] --- # [START anthos_monitoring_prometheus_statefulset_stackdriver_prometheus_k8s] apiVersion: apps/v1 kind: StatefulSet metadata: name: stackdriver-prometheus-k8s namespace: kube-system labels: app: stackdriver-prometheus-k8s spec: serviceName: "stackdriver-prometheus-k8s" replicas: 1 podManagementPolicy: "Parallel" selector: matchLabels: app: stackdriver-prometheus-k8s managed-by: stackdriver template: metadata: labels: app: stackdriver-prometheus-k8s managed-by: stackdriver spec: serviceAccount: stackdriver-prometheus securityContext: fsGroup: 2000 runAsUser: 1000 runAsNonRoot: true nodeSelector: kubernetes.io/os: linux containers: - name: prometheus-server image: prom/prometheus:v2.18.1 imagePullPolicy: IfNotPresent args: - "--config.file=/etc/prometheus/config/prometheus.yaml" - "--storage.tsdb.path=/data" - "--storage.tsdb.min-block-duration=4h" - "--storage.tsdb.retention=8h" ports: - name: prometheus containerPort: 9090 readinessProbe: httpGet: path: /-/ready port: 9090 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 120 # Allow up to 10m on startup for data recovery livenessProbe: httpGet: path: /-/healthy port: 9090 periodSeconds: 5 timeoutSeconds: 3 failureThreshold: 6 # Data based on 100 nodes + 3000 pods scalability test # Limits removed because of b/147894327#comment37 resources: requests: cpu: 250m memory: 2000Mi securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true volumeMounts: - name: config-volume mountPath: /etc/prometheus/config - name: stackdriver-prometheus-data mountPath: /data - name: stackdriver-prometheus-scrape-token readOnly: true mountPath: /var/run/secrets/kubernetes.io/stackdriver-prometheus-scrape - name: stackdriver-prometheus-sidecar image: gcr.io/stackdriver-prometheus/stackdriver-prometheus-sidecar:0.8.0 imagePullPolicy: IfNotPresent env: - name: GOOGLE_APPLICATION_CREDENTIALS value: /google-cloud-credentials/credentials.json args: # USER DOES NOT NEED TO CHANGE THESE - "--config-file=/etc/sidecar/config.yaml" - "--prometheus.wal-directory=/data/wal" - "--stackdriver.metrics-prefix=kubernetes.io/anthos" - "--stackdriver.generic.namespace=kube-system" # USER NEEDS TO UPDATE THESE VALUES BEFORE DEPLOY - "--stackdriver.project-id=[PROJECT_ID]" - "--stackdriver.kubernetes.location=[CLUSTER_LOCATION]" - "--stackdriver.generic.location=[CLUSTER_LOCATION]" - "--stackdriver.kubernetes.cluster-name=[CLUSTER_NAME]" ports: - name: sidecar containerPort: 9091 resources: requests: cpu: 100m memory: 600Mi securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true volumeMounts: - name: sidecar-config-volume mountPath: /etc/sidecar - name: stackdriver-prometheus-data mountPath: /data - mountPath: /google-cloud-credentials name: google-cloud-credentials volumes: - name: sidecar-config-volume configMap: name: stackdriver-prometheus-sidecar-config - name: config-volume configMap: name: stackdriver-prometheus-k8s - name: stackdriver-prometheus-data emptyDir: {} - name: stackdriver-prometheus-scrape-token secret: defaultMode: 420 secretName: stackdriver-prometheus-scrape - name: google-cloud-credentials secret: defaultMode: 420 secretName: google-cloud-credentials tolerations: - key: "CriticalAddonsOnly" operator: "Exists" - key: node-role.gke.io/observability effect: NoSchedule terminationGracePeriodSeconds: 300 volumeClaimTemplates: - metadata: name: stackdriver-prometheus-data spec: # storageClassName: standard #GCP # storageClassName: gp2 #AWS EKS # storageClassName: default #Azure AKS accessModes: - ReadWriteOnce resources: requests: storage: "40Gi" # [END anthos_monitoring_prometheus_statefulset_stackdriver_prometheus_k8s]