logging.tf (292 lines of code) (raw):

locals { cluster_logging_default_values = <<VALUES spec: loki: commonConfig: replication_factor: 1 storage: type: 'filesystem' auth_enabled: false limits_config: enforce_metric_name: true reject_old_samples: true reject_old_samples_max_age: 72h max_cache_freshness_per_query: 10m split_queries_by_interval: 15m config: | {{- if .Values.enterprise.enabled }} {{- tpl .Values.enterprise.config . }} {{- else }} auth_enabled: {{ .Values.loki.auth_enabled }} {{- end }} {{- with .Values.loki.server }} server: {{- toYaml . | nindent 2}} {{- end}} memberlist: join_members: - {{ include "loki.memberlist" . }} {{- with .Values.migrate.fromDistributed }} {{- if .enabled }} - {{ .memberlistService }} {{- end }} {{- end }} {{- with .Values.loki.ingester }} ingester: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} {{- if .Values.loki.commonConfig}} common: {{- toYaml .Values.loki.commonConfig | nindent 2}} storage: {{- include "loki.commonStorageConfig" . | nindent 4}} {{- end}} {{- with .Values.loki.limits_config }} limits_config: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} runtime_config: file: /etc/loki/runtime-config/runtime-config.yaml {{- with .Values.loki.memcached.chunk_cache }} {{- if and .enabled (or .host .addresses) }} chunk_store_config: chunk_cache_config: memcached: batch_size: {{ .batch_size }} parallelism: {{ .parallelism }} memcached_client: {{- if .host }} host: {{ .host }} {{- end }} {{- if .addresses }} addresses: {{ .addresses }} {{- end }} service: {{ .service }} {{- end }} {{- end }} {{- if .Values.loki.schemaConfig}} schema_config: {{- toYaml .Values.loki.schemaConfig | nindent 2}} {{- else }} schema_config: configs: - from: 2022-01-11 store: boltdb-shipper object_store: {{ .Values.loki.storage.type }} schema: v12 index: prefix: loki_index_ period: 24h {{- end }} {{ include "loki.rulerConfig" . }} table_manager: retention_deletes_enabled: true retention_period: 72h {{- with .Values.loki.memcached.results_cache }} query_range: align_queries_with_step: true {{- if and .enabled (or .host .addresses) }} cache_results: {{ .enabled }} results_cache: cache: default_validity: {{ .default_validity }} memcached_client: {{- if .host }} host: {{ .host }} {{- end }} {{- if .addresses }} addresses: {{ .addresses }} {{- end }} service: {{ .service }} timeout: {{ .timeout }} {{- end }} {{- end }} {{- with .Values.loki.storage_config }} storage_config: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} {{- with .Values.loki.query_scheduler }} query_scheduler: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} {{- with .Values.loki.compactor }} compactor: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} {{- with .Values.loki.analytics }} analytics: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} {{- with .Values.loki.querier }} querier: {{- tpl (. | toYaml) $ | nindent 4 }} {{- end }} write: persistence: enableStatefulSetAutoDeletePVC: true size: 10Gi storageClass: standard read: persistence: enableStatefulSetAutoDeletePVC: true size: 10Gi storageClass: standard backend: persistence: enableStatefulSetAutoDeletePVC: true size: 10Gi storageClass: standard singleBinary: replicas: 1 persistence: enableStatefulSetAutoDeletePVC: true enabled: true size: 10Gi storageClass: standard extraObjects: - apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: loki-custom-alerting-rules namespace: "{{ .Release.Namespace }}" spec: groups: - name: loki_custom_example_rules rules: - alert: ExampleCustomAlertForLoki expr: sum(count_over_time({app="loki"}[1m:1h])) > 0 for: 3m labels: severity: warning category: logs cluster: kube-loki message: "loki has encountered errors" VALUES cluster_logging_collector_default_values = <<VALUES spec: daemonset: enabled: true serviceMonitor: enabled: true config: logLevel: info serverPort: 3101 clients: - url: http://loki-headless:3100/loki/api/v1/push snippets: pipelineStages: - cri: {} common: - action: replace source_labels: - __meta_kubernetes_pod_node_name target_label: node_name - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace replacement: $1 separator: / source_labels: - namespace - app target_label: job - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container - action: replace replacement: /var/log/pods/*$1/*.log separator: / source_labels: - __meta_kubernetes_pod_uid - __meta_kubernetes_pod_container_name target_label: __path__ - action: replace replacement: /var/log/pods/*$1/*.log regex: true/(.*) separator: / source_labels: - __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash - __meta_kubernetes_pod_annotation_kubernetes_io_config_hash - __meta_kubernetes_pod_container_name target_label: __path__ scrapeConfigs: | # See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference - job_name: kubernetes-pods pipeline_stages: {{- toYaml .Values.config.snippets.pipelineStages | nindent 4 }} kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: - __meta_kubernetes_pod_controller_name regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})? action: replace target_label: __tmp_controller_name - source_labels: - __meta_kubernetes_pod_label_app_kubernetes_io_name - __meta_kubernetes_pod_label_app - __tmp_controller_name - __meta_kubernetes_pod_name regex: ^;*([^;]+)(;.*)?$ action: replace target_label: app - source_labels: - __meta_kubernetes_pod_label_app_kubernetes_io_instance - __meta_kubernetes_pod_label_release regex: ^;*([^;]+)(;.*)?$ action: replace target_label: instance - source_labels: - __meta_kubernetes_pod_label_app_kubernetes_io_component - __meta_kubernetes_pod_label_component regex: ^;*([^;]+)(;.*)?$ action: replace target_label: component {{- if .Values.config.snippets.addScrapeJobLabel }} - replacement: kubernetes-pods target_label: scrape_job {{- end }} {{- toYaml .Values.config.snippets.common | nindent 4 }} {{- with .Values.config.snippets.extraRelabelConfigs }} {{- toYaml . | nindent 4 }} {{- end }} VALUES } module "cluster_logging" { count = var.cluster_logging_create ? 1 : 0 source = "./modules/feature-grafana-loki" cluster_logging_helm_chart_repository = try(coalesce(var.cluster_logging.helm_chart_repository, "oci://public.registry.jetbrains.space/p/helm/library"), "oci://public.registry.jetbrains.space/p/helm/library") cluster_logging_helm_chart_repository_config = try(coalesce(var.cluster_logging.helm_chart_repository_config, null), null) cluster_logging_helm_chart_version = try(coalesce(var.cluster_logging.helm_chart_version, "5.43.3"), "5.43.3") cluster_logging_helm_chart_name = try(coalesce(var.cluster_logging.helm_chart_name, "kube-grafana-loki"), "kube-grafana-loki") cluster_logging_namespace = try(coalesce(var.cluster_logging.helm_chart_namespace, "kube-monitoring"), "kube-monitoring") cluster_logging_create_namespace_if_not_exists = try(coalesce(var.cluster_logging.create_namespace_if_not_exists, true), true) cluster_logging_default_values_dot_yaml = try(coalesce(var.cluster_logging.helm_chart_values, local.cluster_logging_default_values), local.cluster_logging_default_values) cluster_logging_params = try(coalesce(var.cluster_logging.helm_chart_params, []), []) cluster_logging_secrets = try(coalesce(var.cluster_logging.helm_chart_secrets, []), []) depends_on = [ module.cluster_monitoring ] } module "cluster_logging_collector" { count = var.cluster_logging_create ? 1 : 0 source = "./modules/feature-grafana-promtail" cluster_logging_collector_helm_chart_repository = try(coalesce(var.cluster_logging_collector.helm_chart_repository, "oci://public.registry.jetbrains.space/p/helm/library"), "oci://public.registry.jetbrains.space/p/helm/library") cluster_logging_collector_helm_chart_repository_config = try(coalesce(var.cluster_logging_collector.helm_chart_repository_config, null), null) cluster_logging_collector_helm_chart_version = try(coalesce(var.cluster_logging_collector.helm_chart_version, "6.15.5"), "6.15.5") cluster_logging_collector_helm_chart_name = try(coalesce(var.cluster_logging_collector.helm_chart_name, "kube-grafana-loki"), "kube-grafana-loki") cluster_logging_collector_namespace = try(coalesce(var.cluster_logging_collector.helm_chart_namespace, "kube-monitoring"), "kube-monitoring") cluster_logging_collector_create_namespace_if_not_exists = try(coalesce(var.cluster_logging_collector.create_namespace_if_not_exists, true), true) cluster_logging_collector_default_values_dot_yaml = try(coalesce(var.cluster_logging_collector.helm_chart_values, local.cluster_logging_collector_default_values), local.cluster_logging_collector_default_values) cluster_logging_collector_params = try(coalesce(var.cluster_logging_collector.helm_chart_params, []), []) cluster_logging_collector_secrets = try(coalesce(var.cluster_logging_collector.helm_chart_secrets, []), []) depends_on = [ module.cluster_monitoring ] }