charts/operator/templates/collector.yaml (161 lines of code) (raw):
{{- /*
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/}}
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: collector
namespace: {{.Values.namespace.system}}
{{- if .Values.commonLabels }}
labels:
{{- include "prometheus-engine.collector.labels" . | nindent 4 }}
{{- end }}
spec:
selector:
matchLabels:
# DO NOT MODIFY - label selectors are immutable by the Kubernetes API.
# see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector.
{{- include "prometheus-engine.collector.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "prometheus-engine.collector.templateLabels" . | nindent 8 }}
annotations:
# The emptyDir for the storage and config directories prevents cluster
# autoscaling unless this annotation is set.
cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
components.gke.io/component-name: managed_prometheus
spec:
serviceAccountName: {{ include "prometheus-engine.collector.serviceAccountName" . }}
automountServiceAccountToken: true
priorityClassName: gmp-critical
initContainers:
- name: config-init
image: {{.Values.images.bash.image}}:{{.Values.images.bash.tag}}
command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml']
volumeMounts:
- name: config-out
mountPath: /prometheus/config_out
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- all
privileged: false
readOnlyRootFilesystem: true
containers:
- name: config-reloader
image: {{.Values.images.configReloader.image}}:{{.Values.images.configReloader.tag}}
args:
- --config-file=/prometheus/config/config.yaml
- --config-file-output=/prometheus/config_out/config.yaml
- --reload-url=http://127.0.0.1:19090/-/reload
- --ready-url=http://127.0.0.1:19090/-/ready
- --listen-address=:19091
ports:
- name: cfg-rel-metrics
containerPort: 19091
env:
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
resources: {{- toYaml $.Values.resources.bash | nindent 10}}
volumeMounts:
- name: config
readOnly: true
mountPath: /prometheus/config
- name: config-out
mountPath: /prometheus/config_out
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- all
privileged: false
readOnlyRootFilesystem: true
- name: prometheus
image: {{.Values.images.prometheus.image}}:{{.Values.images.prometheus.tag}}
args:
- --config.file=/prometheus/config_out/config.yaml
- --enable-feature=exemplar-storage
# Special Google flag for authorization using native Kubernetes secrets.
- --enable-feature=google-kubernetes-secret-provider
- --storage.tsdb.path=/prometheus/data
- --storage.tsdb.no-lockfile
# Special Google flag for force deleting all data on start. We use ephemeral storage in
# this manifest, but there are cases were container restart still reuses, potentially
# bad data (corrupted, with high cardinality causing OOMs or slow startups).
# Force deleting, so container restart is consistent with pod restart.
# NOTE: Data is likely already sent GCM, plus GCM export does not use that
# data on disk (WAL).
- --gmp.storage.delete-data-on-start
# Keep 30 minutes of data. As we are backed by an emptyDir volume, this will count towards
# the containers memory usage. We could lower it further if this becomes problematic, but
# it the window for local data is quite convenient for debugging.
- --storage.tsdb.retention.time=30m
- --storage.tsdb.wal-compression
# Effectively disable compaction and make blocks short enough so that our retention window
# can be kept in practice.
- --storage.tsdb.min-block-duration=10m
- --storage.tsdb.max-block-duration=10m
- --web.listen-address=:19090
- --web.enable-lifecycle
- --web.route-prefix=/
- --export.user-agent-mode=kubectl
# JSON log format is needed for GKE to display log levels correctly.
- --log.format=json
ports:
- name: prom-metrics
containerPort: 19090
# The environment variable EXTRA_ARGS will be populated by the operator.
# DO NOT specify it here.
env:
- name: GOGC
value: "25"
resources: {{- toYaml $.Values.resources.collector | nindent 10 }}
volumeMounts:
- name: storage
mountPath: /prometheus/data
- name: config-out
readOnly: true
mountPath: /prometheus/config_out
- name: collection-secret
readOnly: true
mountPath: /etc/secrets
livenessProbe:
httpGet:
port: 19090
path: /-/healthy
scheme: HTTP
readinessProbe:
httpGet:
port: 19090
path: /-/ready
scheme: HTTP
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- all
privileged: false
readOnlyRootFilesystem: true
volumes:
- name: storage
emptyDir: {}
- name: config
configMap:
name: collector
- name: config-out
emptyDir: {}
- name: collection-secret
secret:
secretName: collection
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- arm64
- amd64
- key: kubernetes.io/os
operator: In
values:
- linux
tolerations:
- key: "components.gke.io/gke-managed-components"
operator: "Exists"
- effect: NoExecute
operator: Exists
- effect: NoSchedule
operator: Exists
securityContext:
runAsGroup: 1000
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault