oap-server/server-starter/src/main/resources/otel-rules/flink/flink-jobManager.yaml (42 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This will parse a textual representation of a duration. The formats # accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS} # with days considered to be exactly 24 hours. # <p> # Examples: # <pre> # "PT20.345S" -- parses as "20.345 seconds" # "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds) # "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds) # "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds) # "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes" # "P-6H3M" -- parses as "-6 hours and +3 minutes" # "-P6H3M" -- parses as "-6 hours and -3 minutes" # "-P-6H+3M" -- parses as "+6 hours and -3 minutes" # </pre> filter: "{ tags -> tags.job_name == 'flink-jobManager-monitoring' }" # The OpenTelemetry job name expSuffix: tag({tags -> tags.cluster = 'flink::' + tags.cluster}).service(['cluster'], Layer.FLINK) metricPrefix: meter_flink_jobManager metricsRules: # job - name: running_job_number exp: flink_jobmanager_numRunningJobs.sum(['cluster']) # task - name: taskManagers_registered_number exp: flink_jobmanager_numRegisteredTaskManagers.sum(['cluster']) - name: taskManagers_slots_total exp: flink_jobmanager_taskSlotsTotal.sum(['cluster']) - name: taskManagers_slots_available exp: flink_jobmanager_taskSlotsAvailable.sum(['cluster']) #jvm - name: jvm_cpu_load exp: flink_jobmanager_Status_JVM_CPU_Load.sum(['cluster','jobManager_node'])*1000 - name: jvm_cpu_time exp: flink_jobmanager_Status_JVM_CPU_Time.sum(['cluster','jobManager_node']).increase('PT1M') - name: jvm_memory_heap_used exp: flink_jobmanager_Status_JVM_Memory_Heap_Used.sum(['cluster','jobManager_node']) - name: jvm_memory_heap_available exp: flink_jobmanager_Status_JVM_Memory_Heap_Max.sum(['cluster','jobManager_node'])-flink_jobmanager_Status_JVM_Memory_Heap_Used.sum(['cluster','jobManager_node']) - name: jvm_memory_nonHeap_used exp: flink_jobmanager_Status_JVM_Memory_NonHeap_Used.sum(['cluster','jobManager_node']) - name: jvm_memory_nonHeap_available exp: flink_jobmanager_Status_JVM_Memory_NonHeap_Max.sum(['cluster','jobManager_node'])-flink_jobmanager_Status_JVM_Memory_NonHeap_Used.sum(['cluster','jobManager_node']) - name: jvm_thread_count exp: flink_jobmanager_Status_JVM_Threads_Count.sum(['cluster','jobManager_node']) - name: jvm_memory_metaspace_used exp: flink_jobmanager_Status_JVM_Memory_Metaspace_Used.sum(['cluster','jobManager_node']) - name: jvm_memory_metaspace_available exp: flink_jobmanager_Status_JVM_Memory_Metaspace_Max.sum(['cluster','jobManager_node'])-flink_jobmanager_Status_JVM_Memory_Metaspace_Used.sum(['cluster','jobManager_node']) - name: jvm_g1_young_generation_count exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Count.sum(['cluster','jobManager_node']).increase('PT1M') - name: jvm_g1_old_generation_count exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Count.sum(['cluster','jobManager_node']).increase('PT1M') - name: jvm_g1_young_generation_time exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time.sum(['cluster','jobManager_node']).increase('PT1M') - name: jvm_g1_old_generation_time exp: flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time.sum(['cluster','jobManager_node']).increase('PT1M') - name: jvm_all_garbageCollector_count exp: flink_jobmanager_Status_JVM_GarbageCollector_All_Count.sum(['cluster','jobManager_node']).increase('PT1M') - name: jvm_all_garbageCollector_time exp: flink_jobmanager_Status_JVM_GarbageCollector_All_Time.sum(['cluster','jobManager_node']).increase('PT1M')