integration_test/third_party_apps_test/applications/hadoop/metadata.yaml (117 lines of code) (raw):
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
public_url: "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/third-party/hadoop"
app_url: "https://hadoop.apache.org/"
short_name: Hadoop
long_name: Apache Hadoop
logo_path: /stackdriver/images/hadoop.png # supplied by google technical writer
description: |-
The Apache Hadoop integration collects name-node metrics related to storage,
such as capacity utilization, file accesses, and blocks. The integration also
collects Hadoop logs and parses them into a JSON payload. The result includes
fields for source, level, and message.
configure_integration: |-
To expose a JMX endpoint, you must set the `com.sun.management.jmxremote.port`
system property when starting the JVM. We also recommend setting the
`com.sun.management.jmxremote.rmi.port` system property to the same port. To
expose a JMX endpoint remotely, you must also set the `java.rmi.server.hostname`
system property.
By default, these properties are set in a Hadoop deployment's `hadoop-env.sh`
file.
To set system properties by using command-line arguments, prepend the property
name with `-D` when starting the JVM. For example, to set
`com.sun.management.jmxremote.port` to port `8004`, specify the following when
starting the JVM:
<pre>
-Dcom.sun.management.jmxremote.port=8004
</pre>
minimum_supported_agent_version:
metrics: 2.11.0
logging: 2.11.0
supported_operating_systems: linux
supported_app_version: ["2.10.x", "3.2.x", "3.3.x"]
expected_metrics:
- type: workload.googleapis.com/hadoop.name_node.block.corrupt
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
- type: workload.googleapis.com/hadoop.name_node.block.count
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
- type: workload.googleapis.com/hadoop.name_node.block.missing
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
- type: workload.googleapis.com/hadoop.name_node.capacity.limit
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
- type: workload.googleapis.com/hadoop.name_node.capacity.usage
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
representative: true
- type: workload.googleapis.com/hadoop.name_node.data_node.count
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
- name: state
value_regex: .*
- type: workload.googleapis.com/hadoop.name_node.file.load
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
- type: workload.googleapis.com/hadoop.name_node.volume.failed
value_type: INT64
kind: GAUGE
monitored_resources: [gce_instance]
labels:
- name: node_name
value_regex: .*
expected_logs:
- log_name: hadoop
fields:
- name: jsonPayload.message
value_regex: (?s)STARTUP_MSG:.* # The (?s) part will make the . match with newline as well. See https://github.com/google/re2/blob/main/doc/syntax.txt#L65,L68
type: string
description: 'Log message'
- name: jsonPayload.source
value_regex: org.apache.hadoop.hdfs.server.namenode.NameNode
type: string
description: 'The source Java class of the log entry'
- name: jsonPayload.severity
type: string
description: Log entry level
optional: true
- name: severity
type: string
description: ''
configuration_options:
logs:
- type: hadoop
fields:
- name: type
default: null
description: This value must be `hadoop`.
- name: include_paths
default: '[/opt/hadoop/logs/hadoop-*.log, /opt/hadoop/logs/yarn-*.log]'
description: A list of filesystem paths to read by tailing each file. A wild card (`*`) can be used in the paths.
- name: exclude_paths
default: null
description: A list of filesystem path patterns to exclude from the set matched by `include_paths`.
- name: record_log_file_path
default: false
description: If set to `true`, then the path to the specific file from which the log record was obtained appears in the output log entry as the value of the `agent.googleapis.com/log_file_path` label. When using a wildcard, only the path of the file from which the record was obtained is recorded.
- name: wildcard_refresh_interval
default: 60s
description: The interval at which wildcard file paths in `include_paths` are refreshed. Given as a [time duration](https://pkg.go.dev/time#ParseDuration), for example `30s` or `2m`. This property might be useful under high logging throughputs where log files are rotated faster than the default interval.
metrics:
- type: hadoop
fields:
- name: type
default: null
description: This value must be `hadoop`.
- name: endpoint
default: localhost:8004
description: The [JMX Service URL](https://docs.oracle.com/javase/8/docs/api/javax/management/remote/JMXServiceURL.html) or host and port used to construct the service URL. This value must be in the form of `service:jmx:<protocol>:<sap>` or `host:port`. Values in `host:port` form are used to create a service URL of `service:jmx:rmi:///jndi/rmi://<host>:<port>/jmxrmi`.
- name: collect_jvm_metrics
default: true
description: Configures the receiver to also collect the supported JVM metrics.
- name: username
default: null
description: The configured username if JMX is configured to require authentication.
- name: password
default: null
description: The configured password if JMX is configured to require authentication.
- name: collection_interval
default: 60s
description: A [time duration](https://pkg.go.dev/time#ParseDuration) value, such as `30s` or `5m`.