templates/agent-conf.d/kafka.yaml.erb (414 lines of code) (raw):
#
# MANAGED BY PUPPET
#
##########
# WARNING
##########
# This sample works only for Kafka >= 0.8.2.
# If you are running a version older than that, you can refer to agent 5.2.x released
# sample files, https://raw.githubusercontent.com/DataDog/dd-agent/5.2.1/conf.d/kafka.yaml.example
instances:
<% @servers.each do |server| -%>
- host: <%= server['host'] %>
port: <%= server['port'] %>
<%- if !server['tags'].nil? && server['tags'].any? -%>
tags:
<%- server['tags'].each do |tag| -%>
- <%= tag %>
<%- end -%>
<%- end -%>
<%- if !server['username'].nil? -%>
user: <%= server['username'] %>
<%- end -%>
<%- if !server['password'].nil? -%>
password: <%= server['password'] %>
<%- end -%>
<%- if !server['process_name_regex'].nil? -%>
process_name_regex: <%= server['process_name_regex'] %>
<%- end -%>
<%- if !server['tools_jar_path'].nil? -%>
tools_jar_path: <%= server['tools_jar_path'] %>
<%- end -%>
<%- if !server['name'].nil? -%>
name: <%= server['name'] %>
<%- end -%>
<%- if !server['java_bin_path'].nil? -%>
java_bin_path: <%= server['java_bin_path'] %>
<%- end -%>
<%- if !server['trust_store_path'].nil? -%>
trust_store_path: <%= server['trust_store_path'] %>
<%- end -%>
<%- if !server['trust_store_password'].nil? -%>
trust_store_password: <%= server['trust_store_password'] %>
<%- end -%>
<% end -%>
init_config:
is_jmx: true
# Metrics collected by this check. You should not have to modify this.
conf:
#
# Producers (only v0.8.2.x)
#
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=ProducerRequestMetrics,name=ProducerRequestRateAndTimeMs,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.producer.request_rate
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=ProducerRequestMetrics,name=ProducerRequestRateAndTimeMs,clientId=.*'
attribute:
Mean:
metric_type: gauge
alias: kafka.producer.request_latency_avg
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=ProducerTopicMetrics,name=BytesPerSec,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.producer.bytes_out
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=ProducerTopicMetrics,name=MessagesPerSec,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.producer.message_rate
#
# Producers (v0.9.0.x to v0.10.2.x)
#
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=producer-metrics,client-id=.*'
attribute:
response-rate:
metric_type: gauge
alias: kafka.producer.response_rate
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=producer-metrics,client-id=.*'
attribute:
request-rate:
metric_type: gauge
alias: kafka.producer.request_rate
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=producer-metrics,client-id=.*'
attribute:
request-latency-avg:
metric_type: gauge
alias: kafka.producer.request_latency_avg
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=producer-metrics,client-id=.*'
attribute:
outgoing-byte-rate:
metric_type: gauge
alias: kafka.producer.bytes_out
- include:
domain: 'kafka.producer'
bean_regex: 'kafka\.producer:type=producer-metrics,client-id=.*'
attribute:
io-wait-time-ns-avg:
metric_type: gauge
alias: kafka.producer.io_wait
#
# Consumers (only v0.8.2.x)
#
- include:
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=ConsumerFetcherManager,name=MaxLag,clientId=.*'
attribute:
Value:
metric_type: gauge
alias: kafka.consumer.max_lag
- include:
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=ConsumerFetcherManager,name=MinFetchRate,clientId=.*'
attribute:
Value:
metric_type: gauge
alias: kafka.consumer.fetch_rate
- include:
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.consumer.bytes_in
- include:
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=ConsumerTopicMetrics,name=MessagesPerSec,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.consumer.messages_in
- include:
# Offsets committed to ZooKeeper
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=ZookeeperConsumerConnector,name=ZooKeeperCommitsPerSec,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.consumer.zookeeper_commits
- include:
# Offsets committed to Kafka
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=ZookeeperConsumerConnector,name=KafkaCommitsPerSec,clientId=.*'
attribute:
Count:
metric_type: rate
alias: kafka.consumer.kafka_commits
#
# Consumers (v0.9.0.x to v0.10.2.x)
#
- include:
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=consumer-fetch-manager-metrics,client-id=.*'
attribute:
bytes-consumed-rate:
metric_type: gauge
alias: kafka.consumer.bytes_in
- include:
domain: 'kafka.consumer'
bean_regex: 'kafka\.consumer:type=consumer-fetch-manager-metrics,client-id=.*'
attribute:
records-consumed-rate:
metric_type: gauge
alias: kafka.consumer.messages_in
#
# Aggregate cluster stats
#
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.net.bytes_out.rate
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.net.bytes_in.rate
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.messages_in.rate
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=BrokerTopicMetrics,name=BytesRejectedPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.net.bytes_rejected.rate
#
# Request timings
#
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.request.fetch.failed.rate
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.request.produce.failed.rate
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=RequestsPerSec,request=Produce'
attribute:
Count:
metric_type: rate
alias: kafka.request.produce.rate
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce'
attribute:
Mean:
metric_type: gauge
alias: kafka.request.produce.time.avg
99thPercentile:
metric_type: gauge
alias: kafka.request.produce.time.99percentile
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=RequestsPerSec,request=FetchConsumer'
attribute:
Count:
metric_type: rate
alias: kafka.request.fetch_consumer.rate
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=RequestsPerSec,request=FetchFollower'
attribute:
Count:
metric_type: rate
alias: kafka.request.fetch_follower.rate
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer'
attribute:
Mean:
metric_type: gauge
alias: kafka.request.fetch_consumer.time.avg
99thPercentile:
metric_type: gauge
alias: kafka.request.fetch_consumer.time.99percentile
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower'
attribute:
Mean:
metric_type: gauge
alias: kafka.request.fetch_follower.time.avg
99thPercentile:
metric_type: gauge
alias: kafka.request.fetch_follower.time.99percentile
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=UpdateMetadata'
attribute:
Mean:
metric_type: gauge
alias: kafka.request.update_metadata.time.avg
99thPercentile:
metric_type: gauge
alias: kafka.request.update_metadata.time.99percentile
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Metadata'
attribute:
Mean:
metric_type: gauge
alias: kafka.request.metadata.time.avg
99thPercentile:
metric_type: gauge
alias: kafka.request.metadata.time.99percentile
- include:
domain: 'kafka.network'
bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Offsets'
attribute:
Mean:
metric_type: gauge
alias: kafka.request.offsets.time.avg
99thPercentile:
metric_type: gauge
alias: kafka.request.offsets.time.99percentile
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=KafkaRequestHandlerPool,name=RequestHandlerAvgIdlePercent'
attribute:
OneMinuteRate:
metric_type: gauge
alias: kafka.request.handler.avg.idle.pct.rate
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ProducerRequestPurgatory,name=PurgatorySize'
attribute:
Value:
metric_type: gauge
alias: kafka.request.producer_request_purgatory.size
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=FetchRequestPurgatory,name=PurgatorySize'
attribute:
Value:
metric_type: gauge
alias: kafka.request.fetch_request_purgatory.size
#
# Replication stats
#
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions'
attribute:
Value:
metric_type: gauge
alias: kafka.replication.under_replicated_partitions
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ReplicaManager,name=IsrShrinksPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.replication.isr_shrinks.rate
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ReplicaManager,name=IsrExpandsPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.replication.isr_expands.rate
- include:
domain: 'kafka.controller'
bean: 'kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs'
attribute:
Count:
metric_type: rate
alias: kafka.replication.leader_elections.rate
- include:
domain: 'kafka.controller'
bean: 'kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec'
attribute:
Count:
metric_type: rate
alias: kafka.replication.unclean_leader_elections.rate
- include:
domain: 'kafka.controller'
bean: 'kafka.controller:type=KafkaController,name=OfflinePartitionsCount'
attribute:
Value:
metric_type: gauge
alias: kafka.replication.offline_partitions_count
- include:
domain: 'kafka.controller'
bean: 'kafka.controller:type=KafkaController,name=ActiveControllerCount'
attribute:
Value:
metric_type: gauge
alias: kafka.replication.active_controller_count
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ReplicaManager,name=PartitionCount'
attribute:
Value:
metric_type: gauge
alias: kafka.replication.partition_count
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ReplicaManager,name=LeaderCount'
attribute:
Value:
metric_type: gauge
alias: kafka.replication.leader_count
- include:
domain: 'kafka.server'
bean: 'kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica'
attribute:
Value:
metric_type: gauge
alias: kafka.replication.max_lag
#
# Log flush stats
#
- include:
domain: 'kafka.log'
bean: 'kafka.log:type=LogFlushStats,name=LogFlushRateAndTimeMs'
attribute:
Count:
metric_type: rate
alias: kafka.log.flush_rate.rate