integrations/tgi/prometheus_metadata.yaml (100 lines of code) (raw):

platforms: - type: GKE launch_stage: GA detections: - characteristic_metric: metric_type: prometheus.googleapis.com/tgi_request_count/counter exporter_metadata: name: TGI doc_url: https://github.com/huggingface/text-generation-inference minimum_supported_version: "2.0.2" default_metrics: - name: prometheus.googleapis.com/tgi_batch_concat/counter prometheus_name: tgi_batch_concat kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_batch_concat_duration/histogram prometheus_name: tgi_batch_concat_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_batch_current_max_tokens/gauge prometheus_name: tgi_batch_current_max_tokens kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_batch_current_size/gauge prometheus_name: tgi_batch_current_size kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_batch_decode_duration/histogram prometheus_name: tgi_batch_decode_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_batch_filter_duration/histogram prometheus_name: tgi_batch_filter_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_batch_forward_duration/histogram prometheus_name: tgi_batch_forward_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_batch_inference_count/counter prometheus_name: tgi_batch_inference_count kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_batch_inference_duration/histogram prometheus_name: tgi_batch_inference_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_batch_inference_success/counter prometheus_name: tgi_batch_inference_success kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_queue_size/gauge prometheus_name: tgi_queue_size kind: GAUGE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_request_count/counter prometheus_name: tgi_request_count kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_request_duration/histogram prometheus_name: tgi_request_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_generated_tokens/histogram prometheus_name: tgi_request_generated_tokens kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_inference_duration/histogram prometheus_name: tgi_request_inference_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_input_length/histogram prometheus_name: tgi_request_input_length kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_max_new_tokens/histogram prometheus_name: tgi_request_max_new_tokens kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_mean_time_per_token_duration/histogram prometheus_name: tgi_request_mean_time_per_token_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_queue_duration/histogram prometheus_name: tgi_request_queue_duration kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_skipped_tokens/histogram prometheus_name: tgi_request_skipped_tokens kind: CUMULATIVE value_type: DISTRIBUTION - name: prometheus.googleapis.com/tgi_request_success/counter prometheus_name: tgi_request_success kind: CUMULATIVE value_type: DOUBLE - name: prometheus.googleapis.com/tgi_request_validation_duration/histogram prometheus_name: tgi_request_validation_duration kind: CUMULATIVE value_type: DISTRIBUTION install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/tgi