build/k8s/collector.yaml (361 lines of code) (raw):

--- apiVersion: v1 kind: Namespace metadata: name: adx-mon --- apiVersion: v1 kind: ServiceAccount metadata: name: collector namespace: adx-mon --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: adx-mon:collector rules: - apiGroups: - "" resources: - nodes/metrics - nodes/proxy verbs: - get - apiGroups: - "" resources: - namespaces - pods verbs: - get - list - watch - nonResourceURLs: - /metrics verbs: - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: adx-mon:collector roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: adx-mon:collector subjects: - kind: ServiceAccount name: collector namespace: adx-mon --- apiVersion: v1 kind: ConfigMap metadata: name: collector-config namespace: adx-mon data: config.toml: | # Ingestor URL to send collected telemetry. endpoint = 'https://ingestor.adx-mon.svc.cluster.local' # Region is a location identifier region = '$REGION' # Skip TLS verification. insecure-skip-verify = true # Address to listen on for endpoints. listen-addr = ':8080' # Maximum number of connections to accept. max-connections = 100 # Maximum number of samples to send in a single batch. max-batch-size = 10000 # Storage directory for the WAL. storage-dir = '/mnt/data' # Regexes of metrics to drop from all sources. drop-metrics = [] # Disable metrics forwarding to endpoints. disable-metrics-forwarding = false # WAL flush interval in milliseconds. For collector it's lowered to reduce CPU usage since # fewer metrics are in flight. wal-flush-interval-ms = 1000 lift-labels = [ { name = 'host' }, { name = 'cluster' }, { name = 'adxmon_pod', column = 'Pod' }, { name = 'adxmon_namespace', column = 'Namespace' }, { name = 'adxmon_container', column = 'Container' }, ] # Key/value pairs of labels to add to all metrics. lift-resources = [ { name = 'host' }, { name = 'cluster' }, { name = 'adxmon_pod', column = 'Pod' }, { name = 'adxmon_namespace', column = 'Namespace' }, { name = 'adxmon_container', column = 'Container' }, ] # Key/value pairs of labels to add to all metrics and logs. [add-labels] host = '$(HOSTNAME)' cluster = '$CLUSTER' # Defines a prometheus scrape endpoint. [prometheus-scrape] # Database to store metrics in. database = 'Metrics' default-drop-metrics = false # Defines a static scrape target. static-scrape-target = [ # Scrape our own metrics { host-regex = '.*', url = 'http://$(HOSTNAME):3100/metrics', namespace = 'adx-mon', pod = 'collector', container = 'collector' }, # Scrape kubelet metrics # { host-regex = '.*', url = 'https://$(HOSTNAME):10250/metrics', namespace = 'kube-system', pod = 'kubelet', container = 'kubelet' }, # Scrape cadvisor metrics { host-regex = '.*', url = 'https://$(HOSTNAME):10250/metrics/cadvisor', namespace = 'kube-system', pod = 'kubelet', container = 'cadvisor' }, # Scrape cadvisor metrics { host-regex = '.*', url = 'https://$(HOSTNAME):10250/metrics/resource', namespace = 'kube-system', pod = 'kubelet', container = 'resource' }, ] # Scrape interval in seconds. scrape-interval = 30 # Scrape timeout in seconds. scrape-timeout = 25 # Disable metrics forwarding to endpoints. disable-metrics-forwarding = false # Regexes of metrics to keep from scraping source. keep-metrics = [] # Regexes of metrics to drop from scraping source. drop-metrics = [] # Defines a prometheus remote write endpoint. [[prometheus-remote-write]] # Database to store metrics in. database = 'Metrics' # The path to listen on for prometheus remote write requests. Defaults to /receive. path = '/receive' # Regexes of metrics to drop. drop-metrics = [] # Disable metrics forwarding to endpoints. disable-metrics-forwarding = false # Key/value pairs of labels to add to this source. [prometheus-remote-write.add-labels] # Defines an OpenTelemetry log endpoint. [otel-log] # Attributes lifted from the Body and added to Attributes. lift-attributes = ['kusto.database', 'kusto.table'] [[host-log]] parsers = ['json'] journal-target = [ # matches are optional and are parsed like MATCHES in journalctl. # If different fields are matched, only entries matching all terms are included. # If the same fields are matched, entries matching any term are included. # + can be added between to include a disjunction of terms. # See examples under man 1 journalctl { matches = [ '_SYSTEMD_UNIT=kubelet.service' ], database = 'Logs', table = 'Kubelet' } ] --- apiVersion: apps/v1 kind: DaemonSet metadata: name: collector namespace: adx-mon spec: selector: matchLabels: adxmon: collector updateStrategy: type: RollingUpdate rollingUpdate: maxSurge: 0 maxUnavailable: 30% template: metadata: labels: adxmon: collector annotations: adx-mon/scrape: "true" adx-mon/port: "9091" adx-mon/path: "/metrics" adx-mon/log-destination: "Logs:Collector" adx-mon/log-parsers: json spec: tolerations: - key: CriticalAddonsOnly operator: Exists - key: node-role.kubernetes.io/control-plane operator: Exists effect: NoSchedule - key: node-role.kubernetes.io/master operator: Exists effect: NoSchedule serviceAccountName: collector containers: - name: collector image: "ghcr.io/azure/adx-mon/collector:latest" command: - /collector args: - "--config=/etc/config/config.toml" - "--hostname=$(HOSTNAME)" ports: - containerPort: 8080 protocol: TCP hostPort: 3100 env: - name: LOG_LEVEL value: INFO - name: HOSTNAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: "GODEBUG" value: "http2client=0" volumeMounts: - mountPath: /etc/ssl/certs name: ssl-certs readOnly: true - mountPath: /etc/pki/ca-trust/extracted name: etc-pki-ca-certs readOnly: true - name: config-volume mountPath: /etc/config - name: storage mountPath: /mnt/data - name: varlog mountPath: /var/log readOnly: true - name: varlibdockercontainers mountPath: /var/lib/docker/containers readOnly: true - name: etcmachineid mountPath: /etc/machine-id readOnly: true resources: requests: cpu: 50m memory: 100Mi limits: cpu: 500m memory: 2000Mi volumes: - name: ssl-certs hostPath: path: /etc/ssl/certs type: Directory - name: etc-pki-ca-certs hostPath: path: /etc/pki/ca-trust/extracted type: DirectoryOrCreate - name: config-volume configMap: # Provide the name of the ConfigMap containing the files you want # to add to the container name: collector-config - name: storage hostPath: path: /mnt/collector - name: varlog hostPath: path: /var/log - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers - name: etcmachineid hostPath: path: /etc/machine-id type: File --- apiVersion: v1 kind: ConfigMap metadata: name: collector-singleton-config namespace: adx-mon data: config.toml: | # Ingestor URL to send collected telemetry. endpoint = 'https://ingestor.adx-mon.svc.cluster.local' # Region is a location identifier region = '$REGION' # Skip TLS verification. insecure-skip-verify = true # Address to listen on for endpoints. listen-addr = ':8080' # Maximum number of connections to accept. max-connections = 100 # Maximum number of samples to send in a single batch. max-batch-size = 10000 # Storage directory for the WAL. storage-dir = '/mnt/data' # Regexes of metrics to drop from all sources. drop-metrics = [] # Disable metrics forwarding to endpoints. disable-metrics-forwarding = false # WAL flush interval in milliseconds. For collector it's lowered to reduce CPU usage since # fewer metrics are in flight. wal-flush-interval-ms = 1000 lift-labels = [ { name = 'host' }, { name = 'cluster' }, { name = 'adxmon_pod', column = 'Pod' }, { name = 'adxmon_namespace', column = 'Namespace' }, { name = 'adxmon_container', column = 'Container' }, ] # Key/value pairs of labels to add to all metrics. lift-resources = [ { name = 'host' }, { name = 'cluster' }, { name = 'adxmon_pod', column = 'Pod' }, { name = 'adxmon_namespace', column = 'Namespace' }, { name = 'adxmon_container', column = 'Container' }, ] # Key/value pairs of labels to add to all metrics. [add-labels] host = '$(HOSTNAME)' cluster = '$CLUSTER' # Defines a prometheus scrape endpoint. [prometheus-scrape] # Database to store metrics in. database = 'Metrics' default-drop-metrics = false # Defines a static scrape target. static-scrape-target = [ # Scrape api server endpoint { host-regex = '.*', url = 'https://kubernetes.default.svc/metrics', namespace = 'kube-system', pod = 'kube-apiserver', container = 'kube-apiserver' }, ] # Scrape interval in seconds. scrape-interval = 30 # Scrape timeout in seconds. scrape-timeout = 25 # Disable dynamic discovery of scrape targets. disable-discovery = true # Disable metrics forwarding to endpoints. disable-metrics-forwarding = false # Regexes of metrics to keep from scraping source. keep-metrics = [] # Regexes of metrics to drop from scraping source. drop-metrics = [] --- apiVersion: apps/v1 kind: Deployment metadata: name: collector-singleton namespace: adx-mon spec: replicas: 1 selector: matchLabels: adxmon: collector template: metadata: labels: adxmon: collector annotations: adx-mon/scrape: "true" adx-mon/port: "9091" adx-mon/path: "/metrics" adx-mon/log-destination: "Logs:Collector" adx-mon/log-parsers: json spec: tolerations: - key: CriticalAddonsOnly operator: Exists - key: node-role.kubernetes.io/control-plane operator: Exists effect: NoSchedule - key: node-role.kubernetes.io/master operator: Exists effect: NoSchedule serviceAccountName: collector containers: - name: collector image: "ghcr.io/azure/adx-mon/collector:latest" command: - /collector args: - "--config=/etc/config/config.toml" - "--hostname=$(HOSTNAME)" env: - name: LOG_LEVEL value: INFO - name: HOSTNAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: "GODEBUG" value: "http2client=0" volumeMounts: - mountPath: /etc/ssl/certs name: ssl-certs readOnly: true - mountPath: /etc/pki/ca-trust/extracted name: etc-pki-ca-certs readOnly: true - name: config-volume mountPath: /etc/config - name: storage mountPath: /mnt/data - name: varlog mountPath: /var/log readOnly: true - name: varlibdockercontainers mountPath: /var/lib/docker/containers readOnly: true resources: requests: cpu: 50m memory: 100Mi limits: cpu: 500m memory: 2000Mi volumes: - name: ssl-certs hostPath: path: /etc/ssl/certs type: Directory - name: etc-pki-ca-certs hostPath: path: /etc/pki/ca-trust/extracted type: DirectoryOrCreate - name: config-volume configMap: name: collector-singleton-config - name: storage hostPath: path: /mnt/collector - name: varlog hostPath: path: /var/log - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers