charts/operator/templates/collector.yaml (161 lines of code) (raw):

{{- /* # Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. */}} apiVersion: apps/v1 kind: DaemonSet metadata: name: collector namespace: {{.Values.namespace.system}} {{- if .Values.commonLabels }} labels: {{- include "prometheus-engine.collector.labels" . | nindent 4 }} {{- end }} spec: selector: matchLabels: # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. # see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector. {{- include "prometheus-engine.collector.selectorLabels" . | nindent 6 }} template: metadata: labels: {{- include "prometheus-engine.collector.templateLabels" . | nindent 8 }} annotations: # The emptyDir for the storage and config directories prevents cluster # autoscaling unless this annotation is set. cluster-autoscaler.kubernetes.io/safe-to-evict: "true" components.gke.io/component-name: managed_prometheus spec: serviceAccountName: {{ include "prometheus-engine.collector.serviceAccountName" . }} automountServiceAccountToken: true priorityClassName: gmp-critical initContainers: - name: config-init image: {{.Values.images.bash.image}}:{{.Values.images.bash.tag}} command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] volumeMounts: - name: config-out mountPath: /prometheus/config_out securityContext: allowPrivilegeEscalation: false capabilities: drop: - all privileged: false readOnlyRootFilesystem: true containers: - name: config-reloader image: {{.Values.images.configReloader.image}}:{{.Values.images.configReloader.tag}} args: - --config-file=/prometheus/config/config.yaml - --config-file-output=/prometheus/config_out/config.yaml - --reload-url=http://127.0.0.1:19090/-/reload - --ready-url=http://127.0.0.1:19090/-/ready - --listen-address=:19091 ports: - name: cfg-rel-metrics containerPort: 19091 env: - name: NODE_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName resources: {{- toYaml $.Values.resources.bash | nindent 10}} volumeMounts: - name: config readOnly: true mountPath: /prometheus/config - name: config-out mountPath: /prometheus/config_out securityContext: allowPrivilegeEscalation: false capabilities: drop: - all privileged: false readOnlyRootFilesystem: true - name: prometheus image: {{.Values.images.prometheus.image}}:{{.Values.images.prometheus.tag}} args: - --config.file=/prometheus/config_out/config.yaml - --enable-feature=exemplar-storage # Special Google flag for authorization using native Kubernetes secrets. - --enable-feature=google-kubernetes-secret-provider - --storage.tsdb.path=/prometheus/data - --storage.tsdb.no-lockfile # Special Google flag for force deleting all data on start. We use ephemeral storage in # this manifest, but there are cases were container restart still reuses, potentially # bad data (corrupted, with high cardinality causing OOMs or slow startups). # Force deleting, so container restart is consistent with pod restart. # NOTE: Data is likely already sent GCM, plus GCM export does not use that # data on disk (WAL). - --gmp.storage.delete-data-on-start # Keep 30 minutes of data. As we are backed by an emptyDir volume, this will count towards # the containers memory usage. We could lower it further if this becomes problematic, but # it the window for local data is quite convenient for debugging. - --storage.tsdb.retention.time=30m - --storage.tsdb.wal-compression # Effectively disable compaction and make blocks short enough so that our retention window # can be kept in practice. - --storage.tsdb.min-block-duration=10m - --storage.tsdb.max-block-duration=10m - --web.listen-address=:19090 - --web.enable-lifecycle - --web.route-prefix=/ - --export.user-agent-mode=kubectl # JSON log format is needed for GKE to display log levels correctly. - --log.format=json ports: - name: prom-metrics containerPort: 19090 # The environment variable EXTRA_ARGS will be populated by the operator. # DO NOT specify it here. env: - name: GOGC value: "25" resources: {{- toYaml $.Values.resources.collector | nindent 10 }} volumeMounts: - name: storage mountPath: /prometheus/data - name: config-out readOnly: true mountPath: /prometheus/config_out - name: collection-secret readOnly: true mountPath: /etc/secrets livenessProbe: httpGet: port: 19090 path: /-/healthy scheme: HTTP readinessProbe: httpGet: port: 19090 path: /-/ready scheme: HTTP securityContext: allowPrivilegeEscalation: false capabilities: drop: - all privileged: false readOnlyRootFilesystem: true volumes: - name: storage emptyDir: {} - name: config configMap: name: collector - name: config-out emptyDir: {} - name: collection-secret secret: secretName: collection affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: kubernetes.io/arch operator: In values: - arm64 - amd64 - key: kubernetes.io/os operator: In values: - linux tolerations: - key: "components.gke.io/gke-managed-components" operator: "Exists" - effect: NoExecute operator: Exists - effect: NoSchedule operator: Exists securityContext: runAsGroup: 1000 runAsNonRoot: true runAsUser: 1000 seccompProfile: type: RuntimeDefault