oap-server/server-starter/src/main/resources/otel-rules/flink/flink-jobManager.yaml (42 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This will parse a textual representation of a duration. The formats
# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
# with days considered to be exactly 24 hours.
# <p>
# Examples:
# <pre>
# "PT20.345S" -- parses as "20.345 seconds"
# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
# "P-6H3M" -- parses as "-6 hours and +3 minutes"
# "-P6H3M" -- parses as "-6 hours and -3 minutes"
# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
# </pre>
filter: "{ tags -> tags.job_name == 'flink-jobManager-monitoring' }" # The OpenTelemetry job name
expSuffix: tag({tags -> tags.cluster = 'flink::' + tags.cluster}).service(['cluster'], Layer.FLINK)
metricPrefix: meter_flink_jobManager
metricsRules:
# job
- name: running_job_number
exp: flink_jobmanager_numRunningJobs.sum(['cluster'])
# task
- name: taskManagers_registered_number
exp: flink_jobmanager_numRegisteredTaskManagers.sum(['cluster'])
- name: taskManagers_slots_total
exp: flink_jobmanager_taskSlotsTotal.sum(['cluster'])
- name: taskManagers_slots_available
exp: flink_jobmanager_taskSlotsAvailable.sum(['cluster'])
#jvm
- name: jvm_cpu_load
exp: flink_jobmanager_Status_JVM_CPU_Load.sum(['cluster','jobManager_node'])*1000
- name: jvm_cpu_time
exp: flink_jobmanager_Status_JVM_CPU_Time.sum(['cluster','jobManager_node']).increase('PT1M')
- name: jvm_memory_heap_used
exp: flink_jobmanager_Status_JVM_Memory_Heap_Used.sum(['cluster','jobManager_node'])
- name: jvm_memory_heap_available
exp: flink_jobmanager_Status_JVM_Memory_Heap_Max.sum(['cluster','jobManager_node'])-flink_jobmanager_Status_JVM_Memory_Heap_Used.sum(['cluster','jobManager_node'])
- name: jvm_memory_nonHeap_used
exp: flink_jobmanager_Status_JVM_Memory_NonHeap_Used.sum(['cluster','jobManager_node'])
- name: jvm_memory_nonHeap_available
exp: flink_jobmanager_Status_JVM_Memory_NonHeap_Max.sum(['cluster','jobManager_node'])-flink_jobmanager_Status_JVM_Memory_NonHeap_Used.sum(['cluster','jobManager_node'])
- name: jvm_thread_count
exp: flink_jobmanager_Status_JVM_Threads_Count.sum(['cluster','jobManager_node'])
- name: jvm_memory_metaspace_used
exp: flink_jobmanager_Status_JVM_Memory_Metaspace_Used.sum(['cluster','jobManager_node'])
- name: jvm_memory_metaspace_available
exp: flink_jobmanager_Status_JVM_Memory_Metaspace_Max.sum(['cluster','jobManager_node'])-flink_jobmanager_Status_JVM_Memory_Metaspace_Used.sum(['cluster','jobManager_node'])
- name: jvm_g1_young_generation_count
exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Count.sum(['cluster','jobManager_node']).increase('PT1M')
- name: jvm_g1_old_generation_count
exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Count.sum(['cluster','jobManager_node']).increase('PT1M')
- name: jvm_g1_young_generation_time
exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time.sum(['cluster','jobManager_node']).increase('PT1M')
- name: jvm_g1_old_generation_time
exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time.sum(['cluster','jobManager_node']).increase('PT1M')
- name: jvm_all_garbageCollector_count
exp: flink_jobmanager_Status_JVM_GarbageCollector_All_Count.sum(['cluster','jobManager_node']).increase('PT1M')
- name: jvm_all_garbageCollector_time
exp: flink_jobmanager_Status_JVM_GarbageCollector_All_Time.sum(['cluster','jobManager_node']).increase('PT1M')