integrations/tgi/prometheus_metadata.yaml (100 lines of code) (raw):
platforms:
- type: GKE
launch_stage: GA
detections:
- characteristic_metric:
metric_type: prometheus.googleapis.com/tgi_request_count/counter
exporter_metadata:
name: TGI
doc_url: https://github.com/huggingface/text-generation-inference
minimum_supported_version: "2.0.2"
default_metrics:
- name: prometheus.googleapis.com/tgi_batch_concat/counter
prometheus_name: tgi_batch_concat
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_batch_concat_duration/histogram
prometheus_name: tgi_batch_concat_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_batch_current_max_tokens/gauge
prometheus_name: tgi_batch_current_max_tokens
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_batch_current_size/gauge
prometheus_name: tgi_batch_current_size
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_batch_decode_duration/histogram
prometheus_name: tgi_batch_decode_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_batch_filter_duration/histogram
prometheus_name: tgi_batch_filter_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_batch_forward_duration/histogram
prometheus_name: tgi_batch_forward_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_batch_inference_count/counter
prometheus_name: tgi_batch_inference_count
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_batch_inference_duration/histogram
prometheus_name: tgi_batch_inference_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_batch_inference_success/counter
prometheus_name: tgi_batch_inference_success
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_queue_size/gauge
prometheus_name: tgi_queue_size
kind: GAUGE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_request_count/counter
prometheus_name: tgi_request_count
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_request_duration/histogram
prometheus_name: tgi_request_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_generated_tokens/histogram
prometheus_name: tgi_request_generated_tokens
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_inference_duration/histogram
prometheus_name: tgi_request_inference_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_input_length/histogram
prometheus_name: tgi_request_input_length
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_max_new_tokens/histogram
prometheus_name: tgi_request_max_new_tokens
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_mean_time_per_token_duration/histogram
prometheus_name: tgi_request_mean_time_per_token_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_queue_duration/histogram
prometheus_name: tgi_request_queue_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_skipped_tokens/histogram
prometheus_name: tgi_request_skipped_tokens
kind: CUMULATIVE
value_type: DISTRIBUTION
- name: prometheus.googleapis.com/tgi_request_success/counter
prometheus_name: tgi_request_success
kind: CUMULATIVE
value_type: DOUBLE
- name: prometheus.googleapis.com/tgi_request_validation_duration/histogram
prometheus_name: tgi_request_validation_duration
kind: CUMULATIVE
value_type: DISTRIBUTION
install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/tgi