Path Lines of Code CODE_OF_CONDUCT.md 97 CONTRIBUTING.md 80 README.md 252 assets/tgi_grafana.json 3999 backends/gaudi/README.md 122 backends/gaudi/examples/docker_commands/docker_commands.md 245 backends/gaudi/server/README.md 10 backends/gaudi/server/requirements.txt 86 backends/llamacpp/README.md 18 backends/llamacpp/requirements.txt 4 backends/neuron/README.md 16 backends/neuron/server/build-requirements.txt 3 backends/neuron/tests/pytest.ini 2 backends/neuron/tests/requirements.txt 19 backends/trtllm/CMakeLists.txt 97 backends/trtllm/README.md 41 benchmark/README.md 19 clients/python/README.md 230 crate-hashes.json 3 integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json 129 integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json 99 integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json 514 integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json 129 integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json 514 integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json 26 integration-tests/models/__snapshots__/test_completion_prompts/test_chat_hfhub_nousage.json 62 integration-tests/models/__snapshots__/test_completion_prompts/test_chat_hfhub_usage.json 75 integration-tests/models/__snapshots__/test_completion_prompts/test_chat_openai_nousage.json 71 integration-tests/models/__snapshots__/test_completion_prompts/test_chat_openai_usage.json 87 integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json 38 integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json 666 integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json 20 integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_stream_usage.json 215 integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_all_params.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int/test_compressed_tensors_w8a8_int_load.json 294 integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight.json 469 integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_all_params.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_w8a8_int_dynamic_weight/test_compressed_tensors_w8a8_int_dynamic_weight_load.json 294 integration-tests/models/__snapshots__/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an_all_params.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_w8an_fp/test_compressed_tensors_w8an_load.json 294 integration-tests/models/__snapshots__/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16_all_params.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_wna16_int/test_compressed_tensors_wna16_load.json 294 integration-tests/models/__snapshots__/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24_all_params.json 73 integration-tests/models/__snapshots__/test_compressed_tensors_wna16_int_24/test_compressed_tensors_wna16_int_24_load.json 294 integration-tests/models/__snapshots__/test_continue_final_message/test_llama_completion_single_prompt.json 23 integration-tests/models/__snapshots__/test_continue_final_message/test_llama_completion_single_prompt_continue.json 23 integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq.json 73 integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_all_params.json 73 integration-tests/models/__snapshots__/test_flash_awq/test_flash_llama_awq_load.json 294 integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json 294 integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_sharded.json 73 integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json 73 integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_all_params.json 37 integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json 294 integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json 72 integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json 72 integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json 290 integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json 73 integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_load.json 294 integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_simple.json 73 integration-tests/models/__snapshots__/test_flash_gemma2/test_flash_gemma2.json 73 integration-tests/models/__snapshots__/test_flash_gemma2/test_flash_gemma2_load.json 294 integration-tests/models/__snapshots__/test_flash_gemma3/test_exceed_window.json 109 integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3.json 613 integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_jpg.json 26 integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_png.json 26 integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgba.json 26 integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow.json 26 integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json 26 integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq.json 73 integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq_all_params.json 73 integration-tests/models/__snapshots__/test_flash_gemma_gptq/test_flash_gemma_gptq_load.json 294 integration-tests/models/__snapshots__/test_flash_gpt2/test_flash_gpt2.json 73 integration-tests/models/__snapshots__/test_flash_gpt2/test_flash_gpt2_load.json 294 integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar.json 73 integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_json.json 193 integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_load.json 294 integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_regex.json 73 integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_single_load_instance.json 73 integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_all_params.json 43 integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_load.json 294 integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_simple.json 73 integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2.json 73 integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2_all_params.json 73 integration-tests/models/__snapshots__/test_flash_llama_exl2/test_flash_llama_exl2_load.json 294 integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8.json 73 integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json 73 integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_load.json 294 integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache.json 73 integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json 73 integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_load.json 294 integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json 73 integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json 73 integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json 294 integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin.json 73 integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin_all_params.json 73 integration-tests/models/__snapshots__/test_flash_llama_marlin/test_flash_llama_marlin_load.json 294 integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin.json 73 integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin24_all_params.json 73 integration-tests/models/__snapshots__/test_flash_llama_marlin_24/test_flash_llama_marlin24_load.json 294 integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json 2550 integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json 2550 integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_all_params.json 72 integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_load.json 290 integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_simple.json 72 integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral.json 73 integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_all_params.json 73 integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_load.json 294 integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json 73 integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json 73 integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json 294 integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq.json 73 integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json 73 integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json 294 integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq.json 73 integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_all_params.json 73 integration-tests/models/__snapshots__/test_flash_mixtral_gptq/test_flash_mixtral_gptq_load.json 294 integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox.json 72 integration-tests/models/__snapshots__/test_flash_neox/test_flash_neox_load.json 290 integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox.json 72 integration-tests/models/__snapshots__/test_flash_neox_sharded/test_flash_neox_load.json 290 integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma.json 25 integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json 61 integration-tests/models/__snapshots__/test_flash_pali_gemma2/test_flash_pali_gemma_image.json 133 integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json 73 integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json 49 integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json 294 integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe.json 73 integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_all_params.json 73 integration-tests/models/__snapshots__/test_flash_phi35_moe/test_flash_phi35_moe_load.json 294 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json 73 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json 73 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json 294 integration-tests/models/__snapshots__/test_flash_qwen2_5_vl/test_flash_qwen2_5_vl_bay.json 26 integration-tests/models/__snapshots__/test_flash_qwen2_5_vl/test_flash_qwen2_5_vl_simple.json 26 integration-tests/models/__snapshots__/test_flash_qwen2_5_vl/test_flash_qwen2_5_vl_simple_streaming.json 20 integration-tests/models/__snapshots__/test_flash_qwen2_vl/test_flash_qwen2_vl_bay.json 26 integration-tests/models/__snapshots__/test_flash_qwen2_vl/test_flash_qwen2_vl_inpaint.json 26 integration-tests/models/__snapshots__/test_flash_qwen2_vl/test_flash_qwen2_vl_simple.json 26 integration-tests/models/__snapshots__/test_flash_qwen2_vl/test_flash_qwen2_vl_simple_streaming.json 20 integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder.json 72 integration-tests/models/__snapshots__/test_flash_santacoder/test_flash_santacoder_load.json 290 integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder.json 72 integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_default_params.json 373 integration-tests/models/__snapshots__/test_flash_starcoder/test_flash_starcoder_load.json 290 integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2.json 73 integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json 373 integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_load.json 294 integration-tests/models/__snapshots__/test_flash_starcoder2_lora/test_flash_starcoder2.json 73 integration-tests/models/__snapshots__/test_flash_starcoder2_lora/test_flash_starcoder2_default_params.json 373 integration-tests/models/__snapshots__/test_flash_starcoder2_lora/test_flash_starcoder2_load.json 294 integration-tests/models/__snapshots__/test_flash_starcoder2_lora/test_flash_starcoder2_with_hugcode_adapter.json 71 integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json 25 integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json 25 integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json 102 integration-tests/models/__snapshots__/test_grammar_llama/test_non_flash_llama_grammar_json.json 193 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.1.json 23 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.2.json 23 integration-tests/models/__snapshots__/test_grammar_response_format_llama/test_grammar_response_format_llama_json.json 23 integration-tests/models/__snapshots__/test_idefics/test_idefics.json 168 integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json 674 integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json 91 integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json 73 integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json 294 integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json 73 integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json 127 integration-tests/models/__snapshots__/test_idefics3/test_flash_idefics3_next_simple_url.json 67 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_basic.json 23 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_complex.json 23 integration-tests/models/__snapshots__/test_json_schema_constrain/test_json_schema_stream.json 743 integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_all_params.json 49 integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_load.json 294 integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_simple.json 73 integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_customer_support_adapter.json 251 integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_with_dbpedia_adapter.json 53 integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_adapter.json 251 integration-tests/models/__snapshots__/test_lora_mistral/test_lora_mistral_without_customer_support_adapter.json 251 integration-tests/models/__snapshots__/test_mamba/test_mamba.json 73 integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json 99 integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json 398 integration-tests/models/__snapshots__/test_mllama/test_mllama_load.json 54 integration-tests/models/__snapshots__/test_mllama/test_mllama_simpl.json 26 integration-tests/models/__snapshots__/test_mpt/test_mpt.json 140 integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json 562 integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json 48 integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json 79 integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json 218 integration-tests/models/__snapshots__/test_neox/test_neox.json 72 integration-tests/models/__snapshots__/test_neox/test_neox_load.json 290 integration-tests/models/__snapshots__/test_neox_sharded/test_neox.json 72 integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json 290 integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized.json 73 integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized_all_params.json 73 integration-tests/models/__snapshots__/test_server_gptq_quantized/test_server_gptq_quantized_load.json 294 integration-tests/models/__snapshots__/test_smolvlm/test_flash_smolvlm_next_simple_url.json 61 integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json 60 integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json 242 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto_nostream.json 34 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_nostream.json 34 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice_stream.json 512 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_nostream.json 24 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information_stream.json 402 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_nostream.json 34 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_openai.json 563 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_auto.json 402 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json 1 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_none.json 2002 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json 1 integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_tool_reply_response.json 24 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4.json 613 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_jpg.json 26 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgb_png.json 26 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_base64_rgba.json 26 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow.json 26 integration-tests/models/__snapshots__/test_transformers_llama4/test_flash_llama4_image_cow_dog.json 26 integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_load.json 294 integration-tests/models/__snapshots__/test_transformers_olmo/test_flash_llama_simple.json 73 integration-tests/pytest.ini 5 integration-tests/requirements.txt 120 router/README.md 65 server/README.md 10 server/req.txt 212 server/requirements_cuda.txt 390 server/requirements_gen.txt 191 server/requirements_intel.txt 386 server/requirements_rocm.txt 386