integrations/vllm/prometheus_metadata.yaml (124 lines of code) (raw):
platforms:
- type: GKE
launch_stage: GA
detections:
- characteristic_metric:
metric_type: prometheus.googleapis.com/vllm:prompt_tokens_total/counter
exporter_metadata:
name: vLLM Prometheus Exporter
doc_url: https://docs.vllm.ai/en/v0.6.1/serving/metrics.html
minimum_supported_version: v0.6.2
default_metrics:
- name: prometheus.googleapis.com/vllm:avg_generation_throughput_toks_per_s/gauge
prometheus_name: avg_generation_throughput_toks_per_s
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:avg_prompt_throughput_toks_per_s/gauge
prometheus_name: avg_prompt_throughput_toks_per_s
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:cache_config_info/gauge
prometheus_name: cache_config_info
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:cache_config_info/unknown
prometheus_name: cache_config_info
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:cache_config_info/unknown:counter
prometheus_name: cache_config_info
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:cpu_cache_usage_perc/gauge
prometheus_name: cpu_cache_usage_perc
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:cpu_prefix_cache_hit_rate/gauge
prometheus_name: cpu_prefix_cache_hit_rate
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:e2e_request_latency_seconds/histogram
prometheus_name: e2e_request_latency_seconds
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/vllm:generation_tokens_total/counter
prometheus_name: generation_tokens_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:gpu_cache_usage_perc/gauge
prometheus_name: gpu_cache_usage_perc
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:gpu_prefix_cache_hit_rate/gauge
prometheus_name: gpu_prefix_cache_hit_rate
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:num_preemptions_total/counter
prometheus_name: num_preemptions_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:num_requests_running/gauge
prometheus_name: num_requests_running
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:num_requests_swapped/gauge
prometheus_name: num_requests_swapped
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:num_requests_waiting/gauge
prometheus_name: num_requests_waiting
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:prompt_tokens_total/counter
prometheus_name: prompt_tokens_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:request_generation_tokens/histogram
prometheus_name: request_generation_tokens
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/vllm:request_params_best_of/histogram
prometheus_name: request_params_best_of
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/vllm:request_params_n/histogram
prometheus_name: request_params_n
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/vllm:request_prompt_tokens/histogram
prometheus_name: request_prompt_tokens
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/vllm:request_success_total/counter
prometheus_name: request_success_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:spec_decode_draft_acceptance_rate/gauge
prometheus_name: spec_decode_draft_acceptance_rate
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:spec_decode_efficiency/gauge
prometheus_name: spec_decode_efficiency
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:spec_decode_num_accepted_tokens_total/counter
prometheus_name: spec_decode_num_accepted_tokens_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:spec_decode_num_draft_tokens_total/counter
prometheus_name: spec_decode_num_draft_tokens_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:spec_decode_num_emitted_tokens_total/counter
prometheus_name: spec_decode_num_emitted_tokens_total
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/vllm:time_per_output_token_seconds/histogram
prometheus_name: time_per_output_token_seconds
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/vllm:time_to_first_token_seconds/histogram
prometheus_name: time_to_first_token_seconds
kind: CUMULATIVE
value_type: DISTRIBUTION
install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/vllm