integrations/nvidia-triton/prometheus_metadata.yaml (116 lines of code) (raw):

platforms: - type: GKE detections: - characteristic_metric: metric_type: prometheus.googleapis.com/nv_inference_count/counter launch_stage: GA exporter_metadata: name: NVIDIA Triton doc_url: https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/metrics.html minimum_supported_version: 24.08 default_metrics: - name: prometheus.googleapis.com/nv_cpu_memory_total_bytes/gauge prometheus_name: nv_cpu_memory_total_bytes kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_cpu_memory_used_bytes/gauge prometheus_name: nv_cpu_memory_used_bytes kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_cpu_utilization/gauge prometheus_name: nv_cpu_utilization kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_energy_consumption/counter prometheus_name: nv_energy_consumption kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_gpu_memory_total_bytes/gauge prometheus_name: nv_gpu_memory_total_bytes kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_gpu_memory_used_bytes/gauge prometheus_name: nv_gpu_memory_used_bytes kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_gpu_power_limit/gauge prometheus_name: nv_gpu_power_limit kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_gpu_power_usage/gauge prometheus_name: nv_gpu_power_usage kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_gpu_utilization/gauge prometheus_name: nv_gpu_utilization kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_compute_infer_duration_us/counter prometheus_name: nv_inference_compute_infer_duration_us kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_compute_input_duration_us/counter prometheus_name: nv_inference_compute_input_duration_us kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_compute_output_duration_us/counter prometheus_name: nv_inference_compute_output_duration_us kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_count/counter prometheus_name: nv_inference_count kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_exec_count/counter prometheus_name: nv_inference_exec_count kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_pending_request_count/gauge prometheus_name: nv_inference_pending_request_count kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_queue_duration_us/counter prometheus_name: nv_inference_queue_duration_us kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_request_duration_us/counter prometheus_name: nv_inference_request_duration_us kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_request_failure/counter prometheus_name: nv_inference_request_failure kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_inference_request_success/counter prometheus_name: nv_inference_request_success kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/nv_pinned_memory_pool_total_bytes/gauge prometheus_name: nv_pinned_memory_pool_total_bytes kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_pinned_memory_pool_used_bytes/gauge prometheus_name: nv_pinned_memory_pool_used_bytes kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_trt_llm_general_metrics/gauge prometheus_name: nv_trt_llm_general_metrics kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_trt_llm_inflight_batcher_metrics/gauge prometheus_name: nv_trt_llm_inflight_batcher_metrics kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_trt_llm_kv_cache_block_metrics/gauge prometheus_name: nv_trt_llm_kv_cache_block_metrics kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_trt_llm_request_metrics/gauge prometheus_name: nv_trt_llm_request_metrics kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/nv_trt_llm_runtime_memory_metrics/gauge prometheus_name: nv_trt_llm_runtime_memory_metrics kind: GAUGE value_type: DOUBLE install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/nvidia-triton