Path Lines of Code server/bounds-from-nix.py 27 server/custom_kernels/custom_kernels/fused_attention_cuda.cu 219 server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu 219 server/custom_kernels/setup.py 19 server/exllama_kernels/exllama_kernels/cu_compat.cuh 46 server/exllama_kernels/exllama_kernels/cuda_buffers.cu 62 server/exllama_kernels/exllama_kernels/cuda_buffers.cuh 40 server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cu 50 server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh 15 server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu 218 server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh 31 server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu 166 server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh 37 server/exllama_kernels/exllama_kernels/exllama_ext.cpp 198 server/exllama_kernels/exllama_kernels/hip_compat.cuh 45 server/exllama_kernels/exllama_kernels/matrix.cuh 250 server/exllama_kernels/exllama_kernels/tuning.h 9 server/exllama_kernels/exllama_kernels/util.cuh 25 server/exllama_kernels/setup.py 29 server/exllamav2_kernels/exllamav2_kernels/config.h 11 server/exllamav2_kernels/exllamav2_kernels/cpp/util.h 10 server/exllamav2_kernels/exllamav2_kernels/cuda/compat.cuh 45 server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh 104 server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu 198 server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh 31 server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh 507 server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh 231 server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu 544 server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh 57 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh 89 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_3.cuh 146 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh 195 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh 184 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh 33 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh 29 server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh 44 server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh 45 server/exllamav2_kernels/exllamav2_kernels/ext.cpp 115 server/exllamav2_kernels/setup.py 27 server/pyproject.toml 102 server/text_generation_server/__init__.py 1 server/text_generation_server/adapters/__init__.py 8 server/text_generation_server/adapters/config.py 19 server/text_generation_server/adapters/lora.py 385 server/text_generation_server/adapters/weights.py 101 server/text_generation_server/cache.py 24 server/text_generation_server/cli.py 301 server/text_generation_server/interceptor.py 32 server/text_generation_server/layers/__init__.py 30 server/text_generation_server/layers/attention/__init__.py 34 server/text_generation_server/layers/attention/common.py 42 server/text_generation_server/layers/attention/cuda.py 284 server/text_generation_server/layers/attention/flash_attn_triton.py 649 server/text_generation_server/layers/attention/flashinfer.py 163 server/text_generation_server/layers/attention/ipex.py 149 server/text_generation_server/layers/attention/kv_cache.py 265 server/text_generation_server/layers/attention/rocm.py 286 server/text_generation_server/layers/awq/conversion_utils.py 46 server/text_generation_server/layers/awq/quantize/__init__.py 6 server/text_generation_server/layers/awq/quantize/cuda.py 29 server/text_generation_server/layers/awq/quantize/ipex.py 39 server/text_generation_server/layers/bnb.py 93 server/text_generation_server/layers/compressed_tensors/__init__.py 2 server/text_generation_server/layers/compressed_tensors/loader.py 142 server/text_generation_server/layers/compressed_tensors/w8a8_int.py 196 server/text_generation_server/layers/compressed_tensors/w8an_fp.py 150 server/text_generation_server/layers/compressed_tensors/wna16_int.py 158 server/text_generation_server/layers/compressed_tensors/wna16_int_24.py 79 server/text_generation_server/layers/conv.py 33 server/text_generation_server/layers/eetq.py 37 server/text_generation_server/layers/exl2.py 52 server/text_generation_server/layers/fp8.py 452 server/text_generation_server/layers/gptq/__init__.py 401 server/text_generation_server/layers/gptq/custom_autotune.py 185 server/text_generation_server/layers/gptq/exllama.py 89 server/text_generation_server/layers/gptq/exllamav2.py 190 server/text_generation_server/layers/gptq/ipex.py 112 server/text_generation_server/layers/gptq/quantize.py 855 server/text_generation_server/layers/gptq/triton.py 314 server/text_generation_server/layers/gptq/utils.py 26 server/text_generation_server/layers/layernorm.py 159 server/text_generation_server/layers/linear.py 104 server/text_generation_server/layers/lora.py 201 server/text_generation_server/layers/marlin/__init__.py 14 server/text_generation_server/layers/marlin/fp8.py 99 server/text_generation_server/layers/marlin/gptq.py 390 server/text_generation_server/layers/marlin/marlin.py 279 server/text_generation_server/layers/marlin/util.py 104 server/text_generation_server/layers/medusa.py 144 server/text_generation_server/layers/mlp.py 214 server/text_generation_server/layers/moe/__init__.py 234 server/text_generation_server/layers/moe/fp8.py 149 server/text_generation_server/layers/moe/fused_moe_ipex.py 43 server/text_generation_server/layers/moe/gptq_marlin.py 289 server/text_generation_server/layers/moe/unquantized.py 202 server/text_generation_server/layers/rotary.py 494 server/text_generation_server/layers/speculative.py 44 server/text_generation_server/layers/tensor_parallel.py 196 server/text_generation_server/models/__init__.py 1742 server/text_generation_server/models/bloom.py 37 server/text_generation_server/models/causal_lm.py 713 server/text_generation_server/models/custom_modeling/__init__.py 1 server/text_generation_server/models/custom_modeling/bloom_modeling.py 652 server/text_generation_server/models/custom_modeling/clip.py 466 server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py 448 server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py 632 server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py 561 server/text_generation_server/models/custom_modeling/flash_deepseek_v3_modeling.py 569 server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py 476 server/text_generation_server/models/custom_modeling/flash_gemma3_modeling.py 724 server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py 393 server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py 372 server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py 327 server/text_generation_server/models/custom_modeling/flash_llama_modeling.py 581 server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py 457 server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py 434 server/text_generation_server/models/custom_modeling/flash_neox_modeling.py 339 server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py 101 server/text_generation_server/models/custom_modeling/flash_phi_modeling.py 354 server/text_generation_server/models/custom_modeling/flash_phi_moe_modeling.py 134 server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py 398 server/text_generation_server/models/custom_modeling/flash_rw_modeling.py 583 server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py 444 server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py 526 server/text_generation_server/models/custom_modeling/gemma3/configuration_gemma3.py 113 server/text_generation_server/models/custom_modeling/gemma3/image_processing_gemma3.py 300 server/text_generation_server/models/custom_modeling/gemma3/processing_gemma3.py 137 server/text_generation_server/models/custom_modeling/gemma3/utils.py 26 server/text_generation_server/models/custom_modeling/idefics2.py 678 server/text_generation_server/models/custom_modeling/idefics3.py 468 server/text_generation_server/models/custom_modeling/idefics_config.py 144 server/text_generation_server/models/custom_modeling/idefics_image_processing.py 145 server/text_generation_server/models/custom_modeling/idefics_modeling.py 1048 server/text_generation_server/models/custom_modeling/idefics_perceiver.py 166 server/text_generation_server/models/custom_modeling/idefics_processing.py 227 server/text_generation_server/models/custom_modeling/idefics_vision.py 347 server/text_generation_server/models/custom_modeling/llava_next.py 206 server/text_generation_server/models/custom_modeling/mamba_modeling.py 207 server/text_generation_server/models/custom_modeling/mllama.py 826 server/text_generation_server/models/custom_modeling/mpt_modeling.py 1105 server/text_generation_server/models/custom_modeling/neox_modeling.py 562 server/text_generation_server/models/custom_modeling/opt_modeling.py 610 server/text_generation_server/models/custom_modeling/phi_modeling.py 285 server/text_generation_server/models/custom_modeling/qwen2_5_vl.py 748 server/text_generation_server/models/custom_modeling/qwen2_vl.py 454 server/text_generation_server/models/custom_modeling/siglip.py 297 server/text_generation_server/models/custom_modeling/t5_modeling.py 934 server/text_generation_server/models/custom_modeling/vlm.py 55 server/text_generation_server/models/flash_causal_lm.py 2009 server/text_generation_server/models/galactica.py 104 server/text_generation_server/models/globals.py 57 server/text_generation_server/models/idefics_causal_lm.py 708 server/text_generation_server/models/mamba.py 667 server/text_generation_server/models/metadata_kernels.py 250 server/text_generation_server/models/mllama_causal_lm.py 330 server/text_generation_server/models/model.py 146 server/text_generation_server/models/seq2seq_lm.py 751 server/text_generation_server/models/transformers_flash_causal_lm.py 237 server/text_generation_server/models/transformers_flash_vlm.py 499 server/text_generation_server/models/types.py 89 server/text_generation_server/models/vlm_causal_lm.py 931 server/text_generation_server/server.py 270 server/text_generation_server/tracing.py 44 server/text_generation_server/utils/__init__.py 41 server/text_generation_server/utils/adapter.py 250 server/text_generation_server/utils/chunks.py 17 server/text_generation_server/utils/convert.py 82 server/text_generation_server/utils/dist.py 80 server/text_generation_server/utils/hub.py 174 server/text_generation_server/utils/import_utils.py 59 server/text_generation_server/utils/kernels.py 12 server/text_generation_server/utils/log.py 11 server/text_generation_server/utils/logits_process.py 412 server/text_generation_server/utils/merges/strategies.py 155 server/text_generation_server/utils/merges/utils.py 46 server/text_generation_server/utils/peft.py 59 server/text_generation_server/utils/prefill_chunking.py 15 server/text_generation_server/utils/quantization.py 181 server/text_generation_server/utils/segments.py 36 server/text_generation_server/utils/speculate.py 7 server/text_generation_server/utils/tokens.py 530 server/text_generation_server/utils/watermark.py 70 server/text_generation_server/utils/weights.py 290