integrations/nvidia-triton/prometheus_metadata.yaml (116 lines of code) (raw):
platforms:
- type: GKE
detections:
- characteristic_metric:
metric_type: prometheus.googleapis.com/nv_inference_count/counter
launch_stage: GA
exporter_metadata:
name: NVIDIA Triton
doc_url: https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/user_guide/metrics.html
minimum_supported_version: 24.08
default_metrics:
- name: prometheus.googleapis.com/nv_cpu_memory_total_bytes/gauge
prometheus_name: nv_cpu_memory_total_bytes
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_cpu_memory_used_bytes/gauge
prometheus_name: nv_cpu_memory_used_bytes
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_cpu_utilization/gauge
prometheus_name: nv_cpu_utilization
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_energy_consumption/counter
prometheus_name: nv_energy_consumption
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_gpu_memory_total_bytes/gauge
prometheus_name: nv_gpu_memory_total_bytes
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_gpu_memory_used_bytes/gauge
prometheus_name: nv_gpu_memory_used_bytes
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_gpu_power_limit/gauge
prometheus_name: nv_gpu_power_limit
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_gpu_power_usage/gauge
prometheus_name: nv_gpu_power_usage
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_gpu_utilization/gauge
prometheus_name: nv_gpu_utilization
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_compute_infer_duration_us/counter
prometheus_name: nv_inference_compute_infer_duration_us
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_compute_input_duration_us/counter
prometheus_name: nv_inference_compute_input_duration_us
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_compute_output_duration_us/counter
prometheus_name: nv_inference_compute_output_duration_us
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_count/counter
prometheus_name: nv_inference_count
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_exec_count/counter
prometheus_name: nv_inference_exec_count
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_pending_request_count/gauge
prometheus_name: nv_inference_pending_request_count
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_queue_duration_us/counter
prometheus_name: nv_inference_queue_duration_us
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_request_duration_us/counter
prometheus_name: nv_inference_request_duration_us
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_request_failure/counter
prometheus_name: nv_inference_request_failure
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_inference_request_success/counter
prometheus_name: nv_inference_request_success
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_pinned_memory_pool_total_bytes/gauge
prometheus_name: nv_pinned_memory_pool_total_bytes
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_pinned_memory_pool_used_bytes/gauge
prometheus_name: nv_pinned_memory_pool_used_bytes
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_trt_llm_general_metrics/gauge
prometheus_name: nv_trt_llm_general_metrics
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_trt_llm_inflight_batcher_metrics/gauge
prometheus_name: nv_trt_llm_inflight_batcher_metrics
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_trt_llm_kv_cache_block_metrics/gauge
prometheus_name: nv_trt_llm_kv_cache_block_metrics
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_trt_llm_request_metrics/gauge
prometheus_name: nv_trt_llm_request_metrics
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/nv_trt_llm_runtime_memory_metrics/gauge
prometheus_name: nv_trt_llm_runtime_memory_metrics
kind: GAUGE
value_type: DOUBLE
install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/nvidia-triton