Path Lines of Code backends/client/Cargo.toml 20 backends/client/build.rs 29 backends/client/src/lib.rs 68 backends/client/src/v2/client.rs 208 backends/client/src/v2/mod.rs 11 backends/client/src/v2/sharded_client.rs 202 backends/client/src/v3/client.rs 247 backends/client/src/v3/mod.rs 11 backends/client/src/v3/sharded_client.rs 217 backends/gaudi/server/pyproject.toml 38 backends/gaudi/server/text_generation_server/__init__.py 1 backends/gaudi/server/text_generation_server/adapters/__init__.py 8 backends/gaudi/server/text_generation_server/adapters/config.py 19 backends/gaudi/server/text_generation_server/adapters/lora.py 370 backends/gaudi/server/text_generation_server/adapters/weights.py 101 backends/gaudi/server/text_generation_server/cache.py 24 backends/gaudi/server/text_generation_server/cli.py 297 backends/gaudi/server/text_generation_server/interceptor.py 35 backends/gaudi/server/text_generation_server/layers/__init__.py 32 backends/gaudi/server/text_generation_server/layers/attention/__init__.py 30 backends/gaudi/server/text_generation_server/layers/attention/common.py 71 backends/gaudi/server/text_generation_server/layers/attention/hpu.py 178 backends/gaudi/server/text_generation_server/layers/attention/kv_cache.py 138 backends/gaudi/server/text_generation_server/layers/awq/conversion_utils.py 46 backends/gaudi/server/text_generation_server/layers/awq/quantize/__init__.py 2 backends/gaudi/server/text_generation_server/layers/awq/quantize/hpu.py 99 backends/gaudi/server/text_generation_server/layers/bnb.py 93 backends/gaudi/server/text_generation_server/layers/compressed_tensors/__init__.py 2 backends/gaudi/server/text_generation_server/layers/compressed_tensors/loader.py 115 backends/gaudi/server/text_generation_server/layers/compressed_tensors/w8an_fp.py 209 backends/gaudi/server/text_generation_server/layers/conv.py 33 backends/gaudi/server/text_generation_server/layers/exl2.py 52 backends/gaudi/server/text_generation_server/layers/fp8.py 528 backends/gaudi/server/text_generation_server/layers/gptq/__init__.py 371 backends/gaudi/server/text_generation_server/layers/gptq/hpu.py 163 backends/gaudi/server/text_generation_server/layers/gptq/quantize.py 855 backends/gaudi/server/text_generation_server/layers/gptq/utils.py 26 backends/gaudi/server/text_generation_server/layers/layernorm.py 45 backends/gaudi/server/text_generation_server/layers/linear.py 28 backends/gaudi/server/text_generation_server/layers/lora.py 197 backends/gaudi/server/text_generation_server/layers/medusa.py 144 backends/gaudi/server/text_generation_server/layers/mlp.py 214 backends/gaudi/server/text_generation_server/layers/moe/__init__.py 201 backends/gaudi/server/text_generation_server/layers/moe/fp8.py 240 backends/gaudi/server/text_generation_server/layers/moe/fused_moe.py 97 backends/gaudi/server/text_generation_server/layers/moe/unquantized.py 113 backends/gaudi/server/text_generation_server/layers/rotary.py 507 backends/gaudi/server/text_generation_server/layers/speculative.py 44 backends/gaudi/server/text_generation_server/layers/tensor_parallel.py 184 backends/gaudi/server/text_generation_server/models/__init__.py 984 backends/gaudi/server/text_generation_server/models/custom_modeling/__init__.py 1 backends/gaudi/server/text_generation_server/models/custom_modeling/bloom_modeling.py 652 backends/gaudi/server/text_generation_server/models/custom_modeling/clip.py 466 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py 422 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py 614 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py 543 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_deepseek_v3_modeling.py 604 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py 492 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_gemma3_modeling.py 629 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py 401 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py 366 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py 330 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_llama4_modeling.py 1116 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py 555 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_llava_next.py 201 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py 422 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py 422 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mllama.py 748 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py 344 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py 98 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py 363 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_phi_moe_modeling.py 134 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py 333 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_modeling.py 302 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_qwen3_moe_modeling.py 405 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py 578 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py 442 backends/gaudi/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py 515 backends/gaudi/server/text_generation_server/models/custom_modeling/idefics2.py 677 backends/gaudi/server/text_generation_server/models/custom_modeling/idefics3.py 467 backends/gaudi/server/text_generation_server/models/custom_modeling/mamba_modeling.py 207 backends/gaudi/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py 724 backends/gaudi/server/text_generation_server/models/custom_modeling/qwen2_vl.py 429 backends/gaudi/server/text_generation_server/models/custom_modeling/siglip.py 297 backends/gaudi/server/text_generation_server/models/custom_modeling/vlm.py 55 backends/gaudi/server/text_generation_server/models/flash_causal_lm.py 2113 backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py 856 backends/gaudi/server/text_generation_server/models/globals.py 33 backends/gaudi/server/text_generation_server/models/mllama_causal_lm.py 546 backends/gaudi/server/text_generation_server/models/model.py 111 backends/gaudi/server/text_generation_server/models/seq2seq_lm.py 737 backends/gaudi/server/text_generation_server/models/types.py 82 backends/gaudi/server/text_generation_server/server.py 265 backends/gaudi/server/text_generation_server/tgi_service.py 46 backends/gaudi/server/text_generation_server/tracing.py 44 backends/gaudi/server/text_generation_server/utils/__init__.py 47 backends/gaudi/server/text_generation_server/utils/adapter.py 243 backends/gaudi/server/text_generation_server/utils/chunks.py 17 backends/gaudi/server/text_generation_server/utils/convert.py 82 backends/gaudi/server/text_generation_server/utils/debug.py 29 backends/gaudi/server/text_generation_server/utils/dist.py 49 backends/gaudi/server/text_generation_server/utils/hub.py 174 backends/gaudi/server/text_generation_server/utils/import_utils.py 11 backends/gaudi/server/text_generation_server/utils/kernels.py 12 backends/gaudi/server/text_generation_server/utils/log.py 11 backends/gaudi/server/text_generation_server/utils/logits_process.py 402 backends/gaudi/server/text_generation_server/utils/merges/strategies.py 155 backends/gaudi/server/text_generation_server/utils/merges/utils.py 46 backends/gaudi/server/text_generation_server/utils/peft.py 59 backends/gaudi/server/text_generation_server/utils/prefill_chunking.py 15 backends/gaudi/server/text_generation_server/utils/quantization.py 137 backends/gaudi/server/text_generation_server/utils/segments.py 38 backends/gaudi/server/text_generation_server/utils/sgmv.py 159 backends/gaudi/server/text_generation_server/utils/speculate.py 7 backends/gaudi/server/text_generation_server/utils/tokens.py 634 backends/gaudi/server/text_generation_server/utils/version.py 27 backends/gaudi/server/text_generation_server/utils/watermark.py 70 backends/gaudi/server/text_generation_server/utils/weights.py 295 backends/grpc-metadata/Cargo.toml 9 backends/grpc-metadata/src/lib.rs 32 backends/llamacpp/Cargo.toml 20 backends/llamacpp/build.rs 43 backends/llamacpp/src/backend.rs 614 backends/llamacpp/src/llamacpp.rs 5 backends/llamacpp/src/main.rs 266 backends/llamacpp/src/quantize.rs 30 backends/neuron/Cargo.toml 42 backends/neuron/server/pyproject.toml 23 backends/neuron/server/text_generation_server/cli.py 69 backends/neuron/server/text_generation_server/generator.py 501 backends/neuron/server/text_generation_server/interceptor.py 25 backends/neuron/server/text_generation_server/model.py 99 backends/neuron/server/text_generation_server/server.py 69 backends/neuron/server/text_generation_server/tgi_env.py 229 backends/neuron/tgi_entry_point.py 34 backends/trtllm/Cargo.toml 23 backends/trtllm/build.rs 203 backends/trtllm/cmake/json.cmake 6 backends/trtllm/cmake/spdlog.cmake 15 backends/trtllm/cmake/trtllm.cmake 40 backends/trtllm/cmake/utils/detect_cuda_arch.cu 1 backends/trtllm/csrc/backend.cpp 59 backends/trtllm/csrc/backend.hpp 133 backends/trtllm/csrc/ffi.hpp 156 backends/trtllm/csrc/hardware.hpp 38 backends/trtllm/scripts/setup_sccache.py 37 backends/trtllm/src/errors.rs 20 backends/trtllm/src/lib.rs 67 backends/trtllm/src/looper.rs 281 backends/trtllm/src/main.rs 296 backends/trtllm/src/utils.rs 4 backends/v2/Cargo.toml 70 backends/v2/build.rs 15 backends/v2/src/backend.rs 402 backends/v2/src/client/grpc_client.rs 208 backends/v2/src/client/mod.rs 52 backends/v2/src/client/sharded_client.rs 203 backends/v2/src/lib.rs 118 backends/v2/src/main.rs 203 backends/v2/src/queue.rs 527 backends/v3/Cargo.toml 77 backends/v3/benches/prefix_cache.rs 29 backends/v3/build.rs 15 backends/v3/src/backend.rs 450 backends/v3/src/block_allocator.rs 193 backends/v3/src/client/grpc_client.rs 255 backends/v3/src/client/mod.rs 50 backends/v3/src/client/sharded_client.rs 207 backends/v3/src/lib.rs 159 backends/v3/src/main.rs 217 backends/v3/src/queue.rs 670 backends/v3/src/radix.rs 741