deploy/helm/edot-collector/kube-stack/managed_otlp/values.yaml (562 lines of code) (raw):
# For installation and configuration options, refer to the [installation instructions](https://github.com/elastic/opentelemetry/blob/main/docs/kubernetes/operator/README.md)
# For advanced configuration options, refer to the [official OpenTelemetry Helm chart](https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-kube-stack/values.yaml)
# This file has been tested together with opentelemetry-kube-stack helm chart version: 0.3.3
opentelemetry-operator:
manager:
extraArgs:
- --enable-go-instrumentation
admissionWebhooks:
certManager:
enabled: false # For production environments, it is [recommended to use cert-manager for better security and scalability](https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-operator#tls-certificate-requirement).
autoGenerateCert:
enabled: true # Enable/disable automatic certificate generation. Set to false if manually managing certificates.
recreate: true # Force certificate regeneration on updates. Only applicable if autoGenerateCert.enabled is true.
crds:
create: true # Install the OpenTelemetry Operator CRDs.
defaultCRConfig:
image:
repository: "docker.elastic.co/elastic-agent/elastic-agent"
tag: "9.1.0"
targetAllocator:
enabled: false # Enable/disable the Operator's Target allocator.
# Refer to: https://github.com/open-telemetry/opentelemetry-operator/tree/main/cmd/otel-allocator
clusterRole:
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get"]
# `clusterName` specifies the name of the Kubernetes cluster. It sets the 'k8s.cluster.name' field.
# Cluster Name is automatically detected for EKS/GKE/AKS. Add the below value in environments where cluster name cannot be detected.
# clusterName: myClusterName
collectors:
# Cluster is a K8s deployment EDOT collector focused on gathering telemetry
# at the cluster level (Kubernetes Events and cluster metrics).
cluster:
fullnameOverride: "opentelemetry-kube-stack-cluster-stats"
env:
- name: ELASTIC_AGENT_OTEL
value: '"true"'
config:
exporters:
# [Debug exporter](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/debugexporter/README.md)
debug:
verbosity: basic # Options: basic, detailed. Choose verbosity level for debug logs.
# [Elasticsearch exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/elasticsearchexporter/README.md)
otlp/gateway:
endpoint: "http://opentelemetry-kube-stack-gateway-collector:4317"
tls:
insecure: true
processors:
# [Resource Detection Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor)
resourcedetection/eks:
detectors: [env, eks] # Detects resources from environment variables and EKS (Elastic Kubernetes Service).
timeout: 15s
override: true
eks:
resource_attributes:
k8s.cluster.name:
enabled: true
resourcedetection/gcp:
detectors: [env, gcp] # Detects resources from environment variables and GCP (Google Cloud Platform).
timeout: 2s
override: true
resourcedetection/aks:
detectors: [env, aks] # Detects resources from environment variables and AKS (Azure Kubernetes Service).
timeout: 2s
override: true
aks:
resource_attributes:
k8s.cluster.name:
enabled: true
# [Resource Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourceprocessor)
resource/k8s: # Resource attributes tailored for services within Kubernetes.
attributes:
- key: service.name # Set the service.name resource attribute based on the well-known app.kubernetes.io/name label
from_attribute: app.label.name
action: insert
- key: service.name # Set the service.name resource attribute based on the k8s.container.name attribute
from_attribute: k8s.container.name
action: insert
- key: app.label.name # Delete app.label.name attribute previously used for service.name
action: delete
- key: service.version # Set the service.version resource attribute based on the well-known app.kubernetes.io/version label
from_attribute: app.label.version
action: insert
- key: app.label.version # Delete app.label.version attribute previously used for service.version
action: delete
resource/hostname:
attributes:
- key: host.name
from_attribute: k8s.node.name
action: upsert
# [K8s Attributes Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor)
k8sattributes:
passthrough: false # Annotates resources with the pod IP and does not try to extract any other metadata.
pod_association:
# Below association takes a look at the k8s.pod.ip and k8s.pod.uid resource attributes or connection's context, and tries to match it with the pod having the same attribute.
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
extract:
metadata:
- "k8s.namespace.name"
- "k8s.deployment.name"
- "k8s.replicaset.name"
- "k8s.statefulset.name"
- "k8s.daemonset.name"
- "k8s.cronjob.name"
- "k8s.job.name"
- "k8s.node.name"
- "k8s.pod.name"
- "k8s.pod.ip"
- "k8s.pod.uid"
- "k8s.pod.start_time"
labels:
- tag_name: app.label.name
key: app.kubernetes.io/name
from: pod
- tag_name: app.label.version
key: app.kubernetes.io/version
from: pod
receivers:
# [K8s Objects Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sobjectsreceiver)
k8sobjects:
objects:
- name: events
mode: "watch"
group: "events.k8s.io"
exclude_watch_type:
- "DELETED"
# [K8s Cluster Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/k8sclusterreceiver)
k8s_cluster:
auth_type: serviceAccount # Determines how to authenticate to the K8s API server. This can be one of none (for no auth), serviceAccount (to use the standard service account token provided to the agent pod), or kubeConfig to use credentials from ~/.kube/config.
node_conditions_to_report:
- Ready
- MemoryPressure
allocatable_types_to_report:
- cpu
- memory
metrics:
k8s.pod.status_reason:
enabled: true
resource_attributes:
k8s.kubelet.version:
enabled: true
os.description:
enabled: true
os.type:
enabled: true
k8s.container.status.last_terminated_reason:
enabled: true
# [Service Section](https://opentelemetry.io/docs/collector/configuration/#service)
service:
pipelines:
metrics:
exporters:
- debug
- otlp/gateway
processors:
- k8sattributes
- resourcedetection/eks
- resourcedetection/gcp
- resourcedetection/aks
- resource/k8s
- resource/hostname
receivers:
- k8s_cluster
logs:
receivers:
- k8sobjects
processors:
- resourcedetection/eks
- resourcedetection/gcp
- resourcedetection/aks
- resource/hostname
exporters:
- debug
- otlp/gateway
# Daemon is a K8s daemonset EDOT collector focused on gathering telemetry at
# node level and exposing an OTLP endpoint for data ingestion.
# Auto-instrumentation SDKs will use this endpoint.
daemon:
fullnameOverride: "opentelemetry-kube-stack-daemon"
env:
# Work around for open /mounts error: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/35990
- name: HOST_PROC_MOUNTINFO
value: ""
- name: ELASTIC_AGENT_OTEL
value: '"true"'
presets:
logsCollection:
enabled: true # Enable/disable the collection of node's logs.
storeCheckpoints: true # Store checkpoints for log collection, allowing for resumption from the last processed log.
hostNetwork: true # Use the host's network namespace. This allows the daemon to access the network interfaces of the host directly.
securityContext: # Run the daemon as the root user and group for proper metrics collection.
runAsUser: 0
runAsGroup: 0
scrape_configs_file: "" # [Prometheus metrics](https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-kube-stack#scrape_configs_file-details)
config:
exporters:
# [Debug exporter](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/debugexporter/README.md)
debug:
verbosity: basic
otlp/gateway:
endpoint: "http://opentelemetry-kube-stack-gateway-collector-headless:4317"
tls:
insecure: true
processors:
# [Batch Processor](https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor)
batch: {}
batch/metrics:
# explicitly set send_batch_max_size to 0, as splitting metrics requests may cause version_conflict_engine_exception in TSDB
send_batch_max_size: 0
timeout: 1s
# [Resource Detection Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor)
resourcedetection/eks:
detectors: [env, eks] # Detects resources from environment variables and EKS (Elastic Kubernetes Service).
timeout: 15s
override: true
eks:
resource_attributes:
k8s.cluster.name:
enabled: true
resourcedetection/gcp:
detectors: [env, gcp] # Detects resources from environment variables and GCP (Google Cloud Platform).
timeout: 2s
override: true
resourcedetection/aks:
detectors: [env, aks] # Detects resources from environment variables and AKS (Azure Kubernetes Service).
timeout: 2s
override: true
aks:
resource_attributes:
k8s.cluster.name:
enabled: true
resource/hostname:
attributes:
- key: host.name
from_attribute: k8s.node.name
action: upsert
resourcedetection/system:
detectors: ["system", "ec2"] # Detects resources from the system and EC2 instances.
system:
hostname_sources: ["os"]
resource_attributes:
host.name:
enabled: true
host.id:
enabled: false
host.arch:
enabled: true
host.ip:
enabled: true
host.mac:
enabled: true
host.cpu.vendor.id:
enabled: true
host.cpu.family:
enabled: true
host.cpu.model.id:
enabled: true
host.cpu.model.name:
enabled: true
host.cpu.stepping:
enabled: true
host.cpu.cache.l2.size:
enabled: true
os.description:
enabled: true
os.type:
enabled: true
ec2:
resource_attributes:
host.name:
enabled: false
host.id:
enabled: true
# [Resource Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourceprocessor)
resource/k8s: # Resource attributes tailored for services within Kubernetes.
attributes:
- key: service.name # Set the service.name resource attribute based on the well-known app.kubernetes.io/name label
from_attribute: app.label.name
action: insert
- key: service.name # Set the service.name resource attribute based on the k8s.container.name attribute
from_attribute: k8s.container.name
action: insert
- key: app.label.name # Delete app.label.name attribute previously used for service.name
action: delete
- key: service.version # Set the service.version resource attribute based on the well-known app.kubernetes.io/version label
from_attribute: app.label.version
action: insert
- key: app.label.version # Delete app.label.version attribute previously used for service.version
action: delete
resource/cloud:
attributes:
- key: cloud.instance.id
from_attribute: host.id
action: insert
# [K8s Attributes Processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor)
k8sattributes:
filter:
# Only retrieve pods running on the same node as the collector
node_from_env_var: OTEL_K8S_NODE_NAME
passthrough: false
pod_association:
# Below association takes a look at the k8s.pod.ip and k8s.pod.uid resource attributes or connection's context, and tries to match it with the pod having the same attribute.
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
extract:
metadata:
- "k8s.namespace.name"
- "k8s.deployment.name"
- "k8s.replicaset.name"
- "k8s.statefulset.name"
- "k8s.daemonset.name"
- "k8s.cronjob.name"
- "k8s.job.name"
- "k8s.node.name"
- "k8s.pod.name"
- "k8s.pod.ip"
- "k8s.pod.uid"
- "k8s.pod.start_time"
labels:
- tag_name: app.label.name
key: app.kubernetes.io/name
from: pod
- tag_name: app.label.version
key: app.kubernetes.io/version
from: pod
receivers:
# [OTLP Receiver](https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver/otlpreceiver)
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
# [File Log Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver)
filelog:
retry_on_failure:
enabled: true
start_at: end
exclude:
# exlude collector logs
- /var/log/pods/*opentelemetry-kube-stack*/*/*.log
include:
- /var/log/pods/*/*/*.log
include_file_name: false
include_file_path: true
operators:
- id: container-parser # Extract container's metadata
type: container
# [Hostmetrics Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver)
hostmetrics:
collection_interval: 10s
root_path: /hostfs # Mounted node's root file system
scrapers:
cpu:
metrics:
system.cpu.utilization:
enabled: true
system.cpu.logical.count:
enabled: true
memory:
metrics:
system.memory.utilization:
enabled: true
# process scraper is disabled for now: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/39423
#process:
# mute_process_exe_error: true
# mute_process_io_error: true
# mute_process_user_error: true
# metrics:
# process.threads:
# enabled: true
# process.open_file_descriptors:
# enabled: true
# process.memory.utilization:
# enabled: true
# process.disk.operations:
# enabled: true
network: {}
processes: {}
load: {}
disk: {}
filesystem:
exclude_mount_points:
mount_points:
- /dev/*
- /proc/*
- /sys/*
- /run/k3s/containerd/*
- /var/lib/docker/*
- /var/lib/kubelet/*
- /snap/*
match_type: regexp
exclude_fs_types:
fs_types:
- autofs
- binfmt_misc
- bpf
- cgroup2
- configfs
- debugfs
- devpts
- devtmpfs
- fusectl
- hugetlbfs
- iso9660
- mqueue
- nsfs
- overlay
- proc
- procfs
- pstore
- rpc_pipefs
- securityfs
- selinuxfs
- squashfs
- sysfs
- tracefs
match_type: strict
# [Kubelet Stats Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver)
kubeletstats:
auth_type: serviceAccount # Authentication mechanism with the Kubelet endpoint, refer to: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver#configuration
collection_interval: 20s
endpoint: ${env:OTEL_K8S_NODE_NAME}:10250
node: "${env:OTEL_K8S_NODE_NAME}"
# Required to work for all CSPs without an issue
insecure_skip_verify: true
k8s_api_config:
auth_type: serviceAccount
metrics:
k8s.pod.memory.node.utilization:
enabled: true
k8s.pod.cpu.node.utilization:
enabled: true
k8s.container.cpu_limit_utilization:
enabled: true
k8s.pod.cpu_limit_utilization:
enabled: true
k8s.container.cpu_request_utilization:
enabled: true
k8s.container.memory_limit_utilization:
enabled: true
k8s.pod.memory_limit_utilization:
enabled: true
k8s.container.memory_request_utilization:
enabled: true
k8s.node.uptime:
enabled: true
k8s.node.cpu.usage:
enabled: true
k8s.pod.cpu.usage:
enabled: true
extra_metadata_labels:
- container.id
# [Service Section](https://opentelemetry.io/docs/collector/configuration/#service)
service:
pipelines:
logs/node:
receivers:
- filelog
processors:
- batch
- k8sattributes
- resourcedetection/system
- resourcedetection/eks
- resourcedetection/gcp
- resourcedetection/aks
- resource/k8s
- resource/hostname
- resource/cloud
exporters:
- otlp/gateway
metrics/node/otel:
receivers:
- kubeletstats
- hostmetrics
processors:
- batch/metrics
- k8sattributes
- resourcedetection/system
- resourcedetection/eks
- resourcedetection/gcp
- resourcedetection/aks
- resource/k8s
- resource/hostname
- resource/cloud
exporters:
# - debug
- otlp/gateway
metrics/otel-apm:
receivers:
- otlp
processors:
- batch/metrics
- resource/hostname
exporters:
- otlp/gateway
logs/apm:
receivers:
- otlp
processors:
- batch
- resource/hostname
exporters:
- otlp/gateway
traces/apm:
receivers:
- otlp
processors:
- batch
- resource/hostname
exporters:
- otlp/gateway
# Gateway is a K8s deployment EDOT collector focused on processing and
# forwarding telemetry to an Elasticsearch endpoint.
gateway:
fullnameOverride: "opentelemetry-kube-stack-gateway"
suffix: gateway
replicas: 2
autoscaler:
minReplicas: 2 # Start with at least 2 replicas for better availability.
maxReplicas: 5 # Allow more scale-out if needed.
targetCPUUtilization: 70 # Scale when CPU usage exceeds 70%.
targetMemoryUtilization: 75 # Scale when memory usage exceeds 75%.
resources:
limits:
cpu: 500m
memory: 1000Mi
requests:
cpu: 100m
memory: 500Mi
enabled: true
env:
- name: ELASTIC_AGENT_OTEL
value: '"true"'
- name: ELASTIC_OTLP_ENDPOINT
valueFrom:
secretKeyRef:
name: elastic-secret-otel
key: elastic_otlp_endpoint
- name: ELASTIC_API_KEY
valueFrom:
secretKeyRef:
name: elastic-secret-otel
key: elastic_api_key
config:
receivers:
otlp:
protocols:
grpc:
endpoint: ${env:MY_POD_IP}:4317
http:
endpoint: ${env:MY_POD_IP}:4318
processors:
batch:
send_batch_size: 1000
timeout: 1s
send_batch_max_size: 1500
batch/metrics:
# explicitly set send_batch_max_size to 0, as splitting metrics requests may cause version_conflict_engine_exception in TSDB
send_batch_max_size: 0
timeout: 1s
exporters:
debug:
otlp/ingest:
endpoint: ${env:ELASTIC_OTLP_ENDPOINT}
headers:
Authorization: ApiKey ${env:ELASTIC_API_KEY}
service:
pipelines:
metrics:
receivers: [otlp]
processors: [batch/metrics]
exporters: [debug, otlp/ingest]
logs:
receivers: [otlp]
processors: [batch]
exporters: [debug, otlp/ingest]
traces:
receivers: [otlp]
processors: [batch]
exporters: [debug, otlp/ingest]
# For more details on OpenTelemetry's zero-code instrumentation, see:
# https://opentelemetry.io/docs/concepts/instrumentation/zero-code/
instrumentation:
name: elastic-instrumentation
enabled: true # Enable/disable auto-instrumentation.
exporter:
endpoint: http://opentelemetry-kube-stack-daemon-collector.opentelemetry-operator-system.svc.cluster.local:4318 # The daemonset OpenTelemetry Collector endpoint where telemetry data will be exported.
propagators:
- tracecontext # W3C TraceContext propagator for distributed tracing.
- baggage # Baggage propagator to include baggage information in trace context.
- b3 # B3 propagator for Zipkin-based distributed tracing compatibility.
sampler:
type: parentbased_traceidratio # Sampler type
argument: "1.0" # Sampling rate set to 100% (all traces are sampled).
java:
image: docker.elastic.co/observability/elastic-otel-javaagent:1.3.0
nodejs:
image: docker.elastic.co/observability/elastic-otel-node:1.0.0
dotnet:
image: docker.elastic.co/observability/elastic-otel-dotnet:1.0.1
python:
image: docker.elastic.co/observability/elastic-otel-python:1.0.0
go:
image: ghcr.io/open-telemetry/opentelemetry-go-instrumentation/autoinstrumentation-go:v0.21.0