Path Lines of Code bazel/arch_select.bzl 121 bazel/bundle.bzl 519 bazel/defs.bzl 251 bazel/py_proto.bzl 41 bazel/pyc_wheel.py 129 bazel/tf_http_archive.bzl 260 bazel/tf_proto.bzl 398 bazel/upload_package.py 48 benchmark/benchmark_serving.py 223 def.bzl 193 example/perf_test/defs.bzl 140 maga_transformer/__init__.py 46 maga_transformer/_ft_pickler.py 225 maga_transformer/access_logger/access_logger.py 60 maga_transformer/access_logger/json_util.py 13 maga_transformer/access_logger/log_utils.py 16 maga_transformer/access_logger/py_access_log.py 52 maga_transformer/aios/kmonitor/python_client/flume/ThriftSourceProtocol.py 314 maga_transformer/aios/kmonitor/python_client/flume/__init__.py 1 maga_transformer/aios/kmonitor/python_client/flume/pyflume.py 56 maga_transformer/aios/kmonitor/python_client/flume/ttypes.py 92 maga_transformer/aios/kmonitor/python_client/kmonitor/kmonitor.py 38 maga_transformer/aios/kmonitor/python_client/kmonitor/metrics/acc_metric.py 40 maga_transformer/aios/kmonitor/python_client/kmonitor/metrics/gauge_metric.py 19 maga_transformer/aios/kmonitor/python_client/kmonitor/metrics/metric_base.py 17 maga_transformer/aios/kmonitor/python_client/kmonitor/metrics/metric_factory.py 14 maga_transformer/aios/kmonitor/python_client/kmonitor/qps_metric.py 25 maga_transformer/aios/kmonitor/python_client/kmonitor/report_worker.py 94 maga_transformer/aios/kmonitor/python_client/kmonitor/utils/hippo_helper.py 36 maga_transformer/async_decoder_engine/async_model.py 62 maga_transformer/async_decoder_engine/backend_rpc_server_visitor.py 24 maga_transformer/async_decoder_engine/base_engine.py 25 maga_transformer/async_decoder_engine/embedding/embedding_engine.py 47 maga_transformer/async_decoder_engine/embedding/interface.py 30 maga_transformer/async_decoder_engine/engine_creator.py 26 maga_transformer/async_decoder_engine/rpc_engine.py 50 maga_transformer/config/base_model_config.py 6 maga_transformer/config/exceptions.py 63 maga_transformer/config/generate_config.py 182 maga_transformer/config/gpt_init_model_parameters.py 641 maga_transformer/config/log_config.py 28 maga_transformer/config/task_type.py 55 maga_transformer/config/uvicorn_config.py 23 maga_transformer/cpp/api_server/AccessLogWrapper.cc 248 maga_transformer/cpp/api_server/AccessLogWrapper.h 26 maga_transformer/cpp/api_server/ApiServerMetrics.cc 146 maga_transformer/cpp/api_server/ApiServerMetrics.h 59 maga_transformer/cpp/api_server/ChatService.cc 186 maga_transformer/cpp/api_server/ChatService.h 64 maga_transformer/cpp/api_server/ConcurrencyControllerUtil.h 59 maga_transformer/cpp/api_server/EmbeddingEndpoint.cc 91 maga_transformer/cpp/api_server/EmbeddingEndpoint.h 34 maga_transformer/cpp/api_server/EmbeddingService.cc 117 maga_transformer/cpp/api_server/EmbeddingService.h 33 maga_transformer/cpp/api_server/ErrorResponse.cc 13 maga_transformer/cpp/api_server/ErrorResponse.h 14 maga_transformer/cpp/api_server/Exception.h 147 maga_transformer/cpp/api_server/GangServer.cc 109 maga_transformer/cpp/api_server/GangServer.h 25 maga_transformer/cpp/api_server/GenerateStreamWrapper.cc 127 maga_transformer/cpp/api_server/GenerateStreamWrapper.h 37 maga_transformer/cpp/api_server/HttpApiServer.cc 354 maga_transformer/cpp/api_server/HttpApiServer.h 127 maga_transformer/cpp/api_server/InferenceDataType.h 142 maga_transformer/cpp/api_server/InferenceService.cc 321 maga_transformer/cpp/api_server/InferenceService.h 78 maga_transformer/cpp/api_server/LogLevelOps.cc 23 maga_transformer/cpp/api_server/LogLevelOps.h 5 maga_transformer/cpp/api_server/LoraService.cc 227 maga_transformer/cpp/api_server/LoraService.h 52 maga_transformer/cpp/api_server/ModelStatusService.cc 27 maga_transformer/cpp/api_server/ModelStatusService.h 14 maga_transformer/cpp/api_server/ParallelInfo.h 90 maga_transformer/cpp/api_server/SysCmdService.cc 37 maga_transformer/cpp/api_server/SysCmdService.h 13 maga_transformer/cpp/api_server/TokenProcessor.cc 117 maga_transformer/cpp/api_server/TokenProcessor.h 40 maga_transformer/cpp/api_server/TokenizerEncodeResponse.h 28 maga_transformer/cpp/api_server/TokenizerService.cc 85 maga_transformer/cpp/api_server/TokenizerService.h 27 maga_transformer/cpp/api_server/WeightsLoader.cc 21 maga_transformer/cpp/api_server/WeightsLoader.h 14 maga_transformer/cpp/api_server/WorkerStatusService.cc 47 maga_transformer/cpp/api_server/WorkerStatusService.h 45 maga_transformer/cpp/api_server/common/HealthService.cc 51 maga_transformer/cpp/api_server/common/HealthService.h 21 maga_transformer/cpp/core/Buffer.cc 159 maga_transformer/cpp/core/Buffer.h 154 maga_transformer/cpp/core/BufferHelper.h 51 maga_transformer/cpp/core/Event.h 18 maga_transformer/cpp/core/MemoryTracker.cc 127 maga_transformer/cpp/core/MemoryTracker.h 42 maga_transformer/cpp/core/QBuffer.cc 129 maga_transformer/cpp/core/QBuffer.h 72 maga_transformer/cpp/core/TrackerAllocator.cc 132 maga_transformer/cpp/core/TrackerAllocator.h 30 maga_transformer/cpp/core/Types.cc 129 maga_transformer/cpp/core/Types.h 78 maga_transformer/cpp/core/allocator.cc 6 maga_transformer/cpp/core/allocator.h 40 maga_transformer/cpp/core/cpu_allocator.cc 17 maga_transformer/cpp/core/cpu_allocator.h 18 maga_transformer/cpp/core/torch_utils/BufferTorchUtils.h 205 maga_transformer/cpp/core/torch_utils/torch_cuda_allocator.cc 146 maga_transformer/cpp/core/torch_utils/torch_cuda_allocator.h 78 maga_transformer/cpp/cuda/Dispatch.h 208 maga_transformer/cpp/cuda/ExpertAttentionUtil.cc 68 maga_transformer/cpp/cuda/ExpertAttentionUtil.h 180 maga_transformer/cpp/cuda/allocator_cuda.cc 145 maga_transformer/cpp/cuda/allocator_cuda.h 69 maga_transformer/cpp/cuda/allocator_torch.cc 39 maga_transformer/cpp/cuda/allocator_torch.h 38 maga_transformer/cpp/cuda/comm_buffer/comm_buffer.cc 145 maga_transformer/cpp/cuda/comm_buffer/comm_buffer.h 44 maga_transformer/cpp/cuda/cublas/cublas.h 3 maga_transformer/cpp/cuda/cublas/cublasAlgoMap.cc 163 maga_transformer/cpp/cuda/cublas/cublasAlgoMap.h 74 maga_transformer/cpp/cuda/cublas/cublasFP8MMWrapper.cc 882 maga_transformer/cpp/cuda/cublas/cublasFP8MMWrapper.h 144 maga_transformer/cpp/cuda/cublas/cublasMMWrapper.cc 912 maga_transformer/cpp/cuda/cublas/cublasMMWrapper.h 236 maga_transformer/cpp/cuda/cuda_bf16_fallbacks.cuh 277 maga_transformer/cpp/cuda/cuda_fmha_utils.h 162 maga_transformer/cpp/cuda/cuda_fp8_utils.cu 494 maga_transformer/cpp/cuda/cuda_fp8_utils.h 189 maga_transformer/cpp/cuda/cuda_type_utils.cuh 378 maga_transformer/cpp/cuda/cuda_utils.cc 683 maga_transformer/cpp/cuda/cuda_utils.h 416 maga_transformer/cpp/cuda/cufmha/cufmha.cc 440 maga_transformer/cpp/cuda/cufmha/cufmha.h 149 maga_transformer/cpp/cuda/cufmha/fmha_profiling_interface.h 183 maga_transformer/cpp/cuda/cuggemm/cuggemm.cc 23 maga_transformer/cpp/cuda/cuggemm/cuggemm.h 36 maga_transformer/cpp/cuda/custom_ar/custom_ar_comm.cc 244 maga_transformer/cpp/cuda/custom_ar/custom_ar_comm.h 50 maga_transformer/cpp/cuda/memory_utils.cu 753 maga_transformer/cpp/cuda/memory_utils.h 98 maga_transformer/cpp/cuda/nccl/nccl_utils.cc 343 maga_transformer/cpp/cuda/nccl/nccl_utils.h 85 maga_transformer/cpp/cuda/nccl/nccl_utils_torch.cc 62 maga_transformer/cpp/cuda/nccl/nccl_utils_torch.h 23 maga_transformer/cpp/cuda/nvtx/kernel_profiler.cc 39 maga_transformer/cpp/cuda/nvtx/kernel_profiler.h 28 maga_transformer/cpp/cuda/nvtx/nvtx_utils.cc 74 maga_transformer/cpp/cuda/nvtx/nvtx_utils.h 62 maga_transformer/cpp/cuda/quantize_utils.h 93 maga_transformer/cpp/cuda/reduce_kernel_utils.cuh 338 maga_transformer/cpp/cuda/trt_utils.h 107 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/arch/copy_red_global.hpp 205 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/arch/mma.h 92 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/compute_occupancy.h 59 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/epilogue/collective/epilogue_moe_finalize.hpp 410 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/epilogue/thread/fused_activations.h 53 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/epilogue/threadblock/epilogue_per_row_per_col_scale.h 258 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/epilogue/threadblock/epilogue_tensor_op_int32.h 145 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/epilogue_helpers.h 92 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/device/gemm_universal_base_compat.h 241 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/device/splitk_gemm_grouped.h 357 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/default_fpA_intB_traits.h 90 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/default_int8_traits.h 33 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/default_splitk_gemm_grouped.h 91 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/fpA_intB_gemm.h 375 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/fused_moe_kernel.cuh 196 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/fused_moe_kernel_routine.cuh 717 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/fused_moe_kernel_traits.cuh 188 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/gemm_moe_problem_visitor.h 34 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/gemm_with_epilogue_visitor.h 353 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/group_gemm_traits.h 31 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/mixed_gemm_B_layout.h 69 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/moe_cute_util.cuh 161 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/moe_cutlass_kernel.h 395 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/moe_problem_visitor.h 225 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/moe_sm90_traits.h 21 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/kernel/splitk_gemm_grouped.h 301 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma.h 87 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma_multistage.h 159 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_dq_mma_pipelined.h 136 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_mma.h 152 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/default_mma_bf16.h 189 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_base.h 112 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_multistage.h 41 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_multistage_finegrained.h 409 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_multistage_percol.h 364 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_pipelined.h 229 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/threadblock/dq_mma_pipelined_finegrained.h 261 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/warp/default_mma_tensor_op.h 41 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/warp/mma_tensorop_compute_B_with_f16.h 156 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/gemm/warp/mma_tensorop_dequantizer.h 408 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h 298 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/tile_interleaved_layout.h 27 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/transform/threadblock/fine_grained_scale_zero_iterator.h 143 maga_transformer/cpp/cutlass/cutlass_extensions/include/cutlass_extensions/util/gather_tensor.hpp 118 maga_transformer/cpp/cutlass/cutlass_kernels/cutlass_heuristic.cc 553 maga_transformer/cpp/cutlass/cutlass_kernels/cutlass_heuristic.h 56 maga_transformer/cpp/cutlass/cutlass_kernels/cutlass_preprocessors.cc 538 maga_transformer/cpp/cutlass/cutlass_kernels/cutlass_preprocessors.h 33 maga_transformer/cpp/cutlass/cutlass_kernels/cutlass_type_conversion.h 86 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/bf16_int4_gemm_fg_scalebias.cu 29 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/bf16_int4_gemm_fg_scaleonly.cu 29 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/bf16_int4_gemm_per_col.cu 29 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/bf16_int8_gemm_fg_scalebias.cu 29 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/bf16_int8_gemm_fg_scaleonly.cu 28 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/bf16_int8_gemm_per_col.cu 28 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fp16_int4_gemm_fg_scalebias.cu 27 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fp16_int4_gemm_fg_scaleonly.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fp16_int4_gemm_per_col.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fp16_int8_gemm_fg_scalebias.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fp16_int8_gemm_fg_scaleonly.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fp16_int8_gemm_per_col.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm.h 90 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_dummy_stubs.cu 31 maga_transformer/cpp/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.h 494 maga_transformer/cpp/cutlass/cutlass_kernels/gemm_configs.h 118 maga_transformer/cpp/cutlass/cutlass_kernels/gemm_lut.cc 27 maga_transformer/cpp/cutlass/cutlass_kernels/gemm_lut.h 58 maga_transformer/cpp/cutlass/cutlass_kernels/gemm_lut_utils.h 132 maga_transformer/cpp/cutlass/cutlass_kernels/group_gemm/group_gemm.h 23 maga_transformer/cpp/cutlass/cutlass_kernels/group_gemm/group_gemm_bf16.cu 4 maga_transformer/cpp/cutlass/cutlass_kernels/group_gemm/group_gemm_fp16.cu 4 maga_transformer/cpp/cutlass/cutlass_kernels/group_gemm/group_gemm_fp32.cu 4 maga_transformer/cpp/cutlass/cutlass_kernels/group_gemm/group_gemm_template.h 230 maga_transformer/cpp/cutlass/cutlass_kernels/int8_gemm/int8_gemm.h 76 maga_transformer/cpp/cutlass/cutlass_kernels/int8_gemm/int8_gemm_bf16.cu 28 maga_transformer/cpp/cutlass/cutlass_kernels/int8_gemm/int8_gemm_fp16.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/int8_gemm/int8_gemm_fp32.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/int8_gemm/int8_gemm_int32.cu 26 maga_transformer/cpp/cutlass/cutlass_kernels/int8_gemm/int8_gemm_template.h 406 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/launchers/fused_moe_gemm_launcher_sm80.h 9 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/launchers/fused_moe_gemm_launcher_sm80.inl 69 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/launchers/moe_gemm_launcher_sm90.h 16 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/launchers/moe_gemm_launcher_sm90.inl 247 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_fp8_kernels.cu 418 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_fp8_kernels.h 53 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_gemm_hopper_input.cu 114 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_gemm_kernels.h 167 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_gemm_kernels_template.h 878 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_gemm_kernels_template_sm90.h 152 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_kernels.cu 1245 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_kernels.h 483 maga_transformer/cpp/cutlass/cutlass_kernels/moe_gemm/moe_kernels.inl 1005 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/common.h 81 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/converter.h 60 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/cudaCoreGemm.cu 249 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/cudaCoreGemm.h 51 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/details.h 93 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/fp8Gemm.cu 148 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/fp8Gemm.h 44 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/int8SQ.cu 168 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/int8SQ.h 44 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/kernel.h 111 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/kernelDispatcher.h 123 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/kernelLauncher.h 75 maga_transformer/cpp/cutlass/cutlass_kernels/weightOnlyBatchedGemv/utility.h 274 maga_transformer/cpp/cutlass/cutlass_kernels/weight_only_quant_op.h 19 maga_transformer/cpp/cutlass/gen.py 1104 maga_transformer/cpp/cutlass/interface.h 11 maga_transformer/cpp/cutlass/moe.bzl 261 maga_transformer/cpp/dataclass/EngineInitParameter.cc 332 maga_transformer/cpp/dataclass/EngineInitParameter.h 132 maga_transformer/cpp/dataclass/EngineScheduleInfo.cc 15 maga_transformer/cpp/dataclass/EngineScheduleInfo.h 17 maga_transformer/cpp/dataclass/GenerateConfig.h 175 maga_transformer/cpp/dataclass/LoadBalance.cc 95 maga_transformer/cpp/dataclass/LoadBalance.h 107 maga_transformer/cpp/dataclass/MergedQuery.h 79 maga_transformer/cpp/dataclass/Query.cc 18 maga_transformer/cpp/dataclass/Query.h 149 maga_transformer/cpp/deep_gemm/DeepGemmPlugin.cpp 449 maga_transformer/cpp/deep_gemm/DeepGemmPlugin.h 15 maga_transformer/cpp/deep_gemm/deep_gemm_template.h 92 maga_transformer/cpp/deep_gemm/def.bzl 26 maga_transformer/cpp/deep_gemm/include/fp8_gemm.cuh 387 maga_transformer/cpp/deep_gemm/include/mma_utils.cuh 809 maga_transformer/cpp/deep_gemm/include/scheduler.cuh 87 maga_transformer/cpp/deep_gemm/include/tma_utils.cuh 84 maga_transformer/cpp/deep_gemm/include/utils.cuh 39 maga_transformer/cpp/deep_gemm/utils.h 8 maga_transformer/cpp/devices/BufferManager.cc 172 maga_transformer/cpp/devices/BufferManager.h 69 maga_transformer/cpp/devices/CommonDefines.h 11 maga_transformer/cpp/devices/DeviceBase.cc 406 maga_transformer/cpp/devices/DeviceBase.h 97 maga_transformer/cpp/devices/DeviceData.h 92 maga_transformer/cpp/devices/DeviceExport.cc 11 maga_transformer/cpp/devices/DeviceExport.h 45 maga_transformer/cpp/devices/DeviceFactory.cc 197 maga_transformer/cpp/devices/DeviceFactory.h 46 maga_transformer/cpp/devices/DeviceOps.cc 204 maga_transformer/cpp/devices/DeviceOps.h 78 maga_transformer/cpp/devices/LoraWeights.h 188 maga_transformer/cpp/devices/OpData.cc 116 maga_transformer/cpp/devices/OpData.h 838 maga_transformer/cpp/devices/Weights.h 114 maga_transformer/cpp/devices/arm_impl/ArmActOp.cc 126 maga_transformer/cpp/devices/arm_impl/ArmAttentionOp.cc 610 maga_transformer/cpp/devices/arm_impl/ArmDevice.cc 132 maga_transformer/cpp/devices/arm_impl/ArmDevice.h 67 maga_transformer/cpp/devices/arm_impl/ArmEmbeddingLookup.cc 89 maga_transformer/cpp/devices/arm_impl/ArmGemmKaiOp.cc 385 maga_transformer/cpp/devices/arm_impl/ArmGemmOp.cc 106 maga_transformer/cpp/devices/arm_impl/ArmGemmOptOp.cc 145 maga_transformer/cpp/devices/arm_impl/ArmLayerNormOp.cc 1337 maga_transformer/cpp/devices/arm_impl/ArmSampleOp.cc 319 maga_transformer/cpp/devices/arm_impl/ArmSoftmaxOp.cc 331 maga_transformer/cpp/devices/arm_impl/ArmWeights.cc 44 maga_transformer/cpp/devices/arm_impl/gemm_opt/ArmGemmKernel.h 200 maga_transformer/cpp/devices/arm_impl/gemm_opt/ArmGemmPacking.cc 767 maga_transformer/cpp/devices/arm_impl/gemm_opt/ArmGemmThreadblock.cc 875 maga_transformer/cpp/devices/arm_impl/gemm_opt/activation_const.hpp 33 maga_transformer/cpp/devices/arm_impl/gemm_opt/activation_macro.h 235 maga_transformer/cpp/devices/arm_impl/gemm_opt/arm_common.h 265 maga_transformer/cpp/devices/arm_impl/gemm_opt/gemm_microkernel_macro_m8_bf16.h 1173 maga_transformer/cpp/devices/arm_impl/type_bf16/bfloat16_cmath_impl.hpp 92 maga_transformer/cpp/devices/arm_impl/type_bf16/bfloat16_impl.hpp 229 maga_transformer/cpp/devices/arm_impl/type_bf16/hie_bfloat16.hpp 330 maga_transformer/cpp/devices/arm_impl/type_bf16/hie_bfloat16_cmath.hpp 78 maga_transformer/cpp/devices/base_impl/AttentionLayer.cc 194 maga_transformer/cpp/devices/base_impl/FfnLayer.cc 308 maga_transformer/cpp/devices/base_impl/GroupGemm.cc 27 maga_transformer/cpp/devices/base_impl/LoraLinear.cc 186 maga_transformer/cpp/devices/base_impl/LoraLinearWithActivation.cc 11 maga_transformer/cpp/devices/base_impl/MhaQKVGemm.cc 49 maga_transformer/cpp/devices/base_impl/MlaAttentionLayer.cc 150 maga_transformer/cpp/devices/base_tests/ActOpTest.hpp 91 maga_transformer/cpp/devices/base_tests/AttentionLayerTest.hpp 118 maga_transformer/cpp/devices/base_tests/AttentionOpTest.hpp 243 maga_transformer/cpp/devices/base_tests/BasicDeviceTest.cc 20 maga_transformer/cpp/devices/base_tests/BeamSearchOpTest.hpp 88 maga_transformer/cpp/devices/base_tests/DistributedTest.cc 86 maga_transformer/cpp/devices/base_tests/FfnLayerTest.hpp 506 maga_transformer/cpp/devices/base_tests/GemmOpTest.hpp 236 maga_transformer/cpp/devices/base_tests/GeneralOpsTest.hpp 266 maga_transformer/cpp/devices/base_tests/GroupGemmOpTest.hpp 86 maga_transformer/cpp/devices/base_tests/LayerNormTest.hpp 217 maga_transformer/cpp/devices/base_tests/LoraLinearLayerTest.hpp 155 maga_transformer/cpp/devices/base_tests/SoftmaxOpTest.hpp 69 maga_transformer/cpp/devices/cpu_impl/CpuDevice.cc 273 maga_transformer/cpp/devices/cpu_impl/CpuDevice.h 32 maga_transformer/cpp/devices/cpu_impl/CpuSampleOp.cc 432 maga_transformer/cpp/devices/cuda_impl/CudaActOp.cc 102 maga_transformer/cpp/devices/cuda_impl/CudaAddBiasOp.cc 24 maga_transformer/cpp/devices/cuda_impl/CudaAttentionOp.cc 312 maga_transformer/cpp/devices/cuda_impl/CudaBeamSearchOp.cc 71 maga_transformer/cpp/devices/cuda_impl/CudaDeepEPFfnLayer.cc 261 maga_transformer/cpp/devices/cuda_impl/CudaDeepEPLLFfnLayer.cc 104 maga_transformer/cpp/devices/cuda_impl/CudaDevice.cc 619 maga_transformer/cpp/devices/cuda_impl/CudaDevice.h 246 maga_transformer/cpp/devices/cuda_impl/CudaDeviceRegister.cc 5 maga_transformer/cpp/devices/cuda_impl/CudaEmbeddingLookup.cc 36 maga_transformer/cpp/devices/cuda_impl/CudaFP8Moe.cc 313 maga_transformer/cpp/devices/cuda_impl/CudaFfnLayer.cc 393 maga_transformer/cpp/devices/cuda_impl/CudaFlashInfer.cc 469 maga_transformer/cpp/devices/cuda_impl/CudaFlashInfer.h 77 maga_transformer/cpp/devices/cuda_impl/CudaGemmOp.cc 340 maga_transformer/cpp/devices/cuda_impl/CudaGroupGemmOp.cc 62 maga_transformer/cpp/devices/cuda_impl/CudaLayernorm.cc 287 maga_transformer/cpp/devices/cuda_impl/CudaLoraLinear.cc 243 maga_transformer/cpp/devices/cuda_impl/CudaLoraLinearWithActOp.cc 28 maga_transformer/cpp/devices/cuda_impl/CudaMlaAttentionOp.cc 261 maga_transformer/cpp/devices/cuda_impl/CudaMlaContextAttention.cc 7 maga_transformer/cpp/devices/cuda_impl/CudaMlaQKVGemm.cc 10 maga_transformer/cpp/devices/cuda_impl/CudaNvtxOp.cc 10 maga_transformer/cpp/devices/cuda_impl/CudaOps.cc 571 maga_transformer/cpp/devices/cuda_impl/CudaPrefillAttention.cc 269 maga_transformer/cpp/devices/cuda_impl/CudaQuantizeOp.cc 175 maga_transformer/cpp/devices/cuda_impl/CudaSampleOp.cc 418 maga_transformer/cpp/devices/cuda_impl/CudaSoftmaxOp.cc 89 maga_transformer/cpp/devices/cuda_impl/CudaWeights.cc 59 maga_transformer/cpp/devices/cuda_impl/DeepEPBuffer.cc 603 maga_transformer/cpp/devices/cuda_impl/DeepEPBuffer.h 130 maga_transformer/cpp/devices/cuda_impl/DeepEPDefs.h 252 maga_transformer/cpp/devices/device_defs.bzl 50 maga_transformer/cpp/devices/rocm_impl/ROCmActOp.cc 86 maga_transformer/cpp/devices/rocm_impl/ROCmAllocator.h 40 maga_transformer/cpp/devices/rocm_impl/ROCmAttentionOp.cc 385 maga_transformer/cpp/devices/rocm_impl/ROCmDevice.cc 372 maga_transformer/cpp/devices/rocm_impl/ROCmDevice.h 101 maga_transformer/cpp/devices/rocm_impl/ROCmDistributedOp.cc 93 maga_transformer/cpp/devices/rocm_impl/ROCmFfnLayer.cc 166 maga_transformer/cpp/devices/rocm_impl/ROCmGemmOp.cc 288 maga_transformer/cpp/devices/rocm_impl/ROCmLayernorm.cc 265 maga_transformer/cpp/devices/rocm_impl/ROCmLoraLinearWithActOP.cc 132 maga_transformer/cpp/devices/rocm_impl/ROCmOps.cc 54 maga_transformer/cpp/devices/rocm_impl/ROCmQuantizeOp.cc 91 maga_transformer/cpp/devices/rocm_impl/ROCmSampleOp.cc 266 maga_transformer/cpp/devices/rocm_impl/ROCmSoftmaxOp.cc 70 maga_transformer/cpp/devices/rocm_impl/ROCmWeights.cc 117 maga_transformer/cpp/devices/rocm_impl/RocmTestUtils.h 7 maga_transformer/cpp/devices/rocm_impl/custom_ar_comm.cc 229 maga_transformer/cpp/devices/rocm_impl/custom_ar_comm.h 47 maga_transformer/cpp/devices/rocm_impl/torch_hip_allocator.cc 50 maga_transformer/cpp/devices/rocm_impl/torch_hip_allocator.h 99 maga_transformer/cpp/devices/testing/TestBase.cc 5 maga_transformer/cpp/devices/testing/TestBase.h 354 maga_transformer/cpp/devices/torch_impl/BeamSearchOp.h 80 maga_transformer/cpp/devices/torch_impl/FfnLayer.h 204 maga_transformer/cpp/devices/torch_impl/GptModel.hpp 157 maga_transformer/cpp/devices/utils/DebugUtils.cc 338 maga_transformer/cpp/devices/utils/DebugUtils.h 34 maga_transformer/cpp/devices/utils/DevicePerfWrapper.h 30 maga_transformer/cpp/devices/utils/Timer.h 95 maga_transformer/cpp/disaggregate/cache_store/CacheLoadServiceClosure.cpp 90 maga_transformer/cpp/disaggregate/cache_store/CacheLoadServiceClosure.h 46 maga_transformer/cpp/disaggregate/cache_store/CacheStore.h 36 maga_transformer/cpp/disaggregate/cache_store/CacheStoreServiceImpl.cpp 82 maga_transformer/cpp/disaggregate/cache_store/CacheStoreServiceImpl.h 46 maga_transformer/cpp/disaggregate/cache_store/CacheStoreServiceImplContext.cpp 161 maga_transformer/cpp/disaggregate/cache_store/CacheStoreServiceImplContext.h 49 maga_transformer/cpp/disaggregate/cache_store/CommonDefine.h 53 maga_transformer/cpp/disaggregate/cache_store/Impl.cpp 28 maga_transformer/cpp/disaggregate/cache_store/InitParams.h 18 maga_transformer/cpp/disaggregate/cache_store/Interface.h 18 maga_transformer/cpp/disaggregate/cache_store/LoadContext.cpp 147 maga_transformer/cpp/disaggregate/cache_store/LoadContext.h 65 maga_transformer/cpp/disaggregate/cache_store/MemoryUtil.h 15 maga_transformer/cpp/disaggregate/cache_store/MessagerClient.cpp 126 maga_transformer/cpp/disaggregate/cache_store/MessagerClient.h 44 maga_transformer/cpp/disaggregate/cache_store/MessagerServer.cpp 73 maga_transformer/cpp/disaggregate/cache_store/MessagerServer.h 31 maga_transformer/cpp/disaggregate/cache_store/NoRdmaMemoryUtilImpl.cpp 21 maga_transformer/cpp/disaggregate/cache_store/NoRdmaMemoryUtilImpl.h 12 maga_transformer/cpp/disaggregate/cache_store/NormalCacheStore.cpp 199 maga_transformer/cpp/disaggregate/cache_store/NormalCacheStore.h 68 maga_transformer/cpp/disaggregate/cache_store/RequestBlockBuffer.cpp 110 maga_transformer/cpp/disaggregate/cache_store/RequestBlockBuffer.h 54 maga_transformer/cpp/disaggregate/cache_store/RequestBlockBufferStore.cpp 161 maga_transformer/cpp/disaggregate/cache_store/RequestBlockBufferStore.h 34 maga_transformer/cpp/disaggregate/cache_store/Timer.cpp 25 maga_transformer/cpp/disaggregate/cache_store/Timer.h 21 maga_transformer/cpp/disaggregate/cache_store/TimerManager.cpp 34 maga_transformer/cpp/disaggregate/cache_store/TimerManager.h 20 maga_transformer/cpp/disaggregate/cache_store/metrics/CacheStoreMetricsCollector.cpp 211 maga_transformer/cpp/disaggregate/cache_store/metrics/CacheStoreMetricsCollector.h 119 maga_transformer/cpp/disaggregate/cache_store/metrics/CacheStoreMetricsReporter.cpp 141 maga_transformer/cpp/disaggregate/cache_store/metrics/CacheStoreMetricsReporter.h 48 maga_transformer/cpp/disaggregate/cache_store/proto/cache_store_service.proto 51 maga_transformer/cpp/disaggregate/load_balancer/BaseLoadBalancer.cpp 53 maga_transformer/cpp/disaggregate/load_balancer/BaseLoadBalancer.h 34 maga_transformer/cpp/disaggregate/load_balancer/HeartbeatSynchronizer.cpp 114 maga_transformer/cpp/disaggregate/load_balancer/HeartbeatSynchronizer.h 74 maga_transformer/cpp/disaggregate/load_balancer/RRLoadBalancer.cpp 32 maga_transformer/cpp/disaggregate/load_balancer/RRLoadBalancer.h 12 maga_transformer/cpp/disaggregate/load_balancer/WRRLoadBalancer.cpp 116 maga_transformer/cpp/disaggregate/load_balancer/WRRLoadBalancer.h 21 maga_transformer/cpp/disaggregate/load_balancer/WorkerAwaredLoadBalancer.cpp 56 maga_transformer/cpp/disaggregate/load_balancer/WorkerAwaredLoadBalancer.h 25 maga_transformer/cpp/disaggregate/load_balancer/subscribe/LocalSubscribeService.cpp 27 maga_transformer/cpp/disaggregate/load_balancer/subscribe/LocalSubscribeService.h 18 maga_transformer/cpp/disaggregate/load_balancer/subscribe/NacosSubscribeService.cpp 43 maga_transformer/cpp/disaggregate/load_balancer/subscribe/NacosSubscribeService.h 18 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeService.h 10 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeServiceConfig.cpp 80 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeServiceConfig.h 60 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeServiceCreator.cpp 25 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeServiceCreator.h 7 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeServiceManager.cpp 60 maga_transformer/cpp/disaggregate/load_balancer/subscribe/SubscribeServiceManager.h 17 maga_transformer/cpp/disaggregate/load_balancer/subscribe/TopoNode.cpp 9 maga_transformer/cpp/disaggregate/load_balancer/subscribe/TopoNode.h 15 maga_transformer/cpp/disaggregate/rtpllm_master/cluster/PrefillLoadBalancer.cpp 160 maga_transformer/cpp/disaggregate/rtpllm_master/cluster/PrefillLoadBalancer.h 36 maga_transformer/cpp/disaggregate/rtpllm_master/cluster/PrefillWorkerInfo.h 120 maga_transformer/cpp/disaggregate/rtpllm_master/common/TaskDescription.h 19 maga_transformer/cpp/disaggregate/rtpllm_master/common/UserRequest.h 13 maga_transformer/cpp/disaggregate/rtpllm_master/entry/Init.cpp 10 maga_transformer/cpp/disaggregate/rtpllm_master/entry/MasterHttpServer.cpp 102 maga_transformer/cpp/disaggregate/rtpllm_master/entry/MasterHttpServer.h 49 maga_transformer/cpp/disaggregate/rtpllm_master/entry/MasterInitParameter.cpp 34 maga_transformer/cpp/disaggregate/rtpllm_master/entry/MasterInitParameter.h 40 maga_transformer/cpp/disaggregate/rtpllm_master/entry/RandomRequestIdGenerator.h 27 maga_transformer/cpp/disaggregate/rtpllm_master/entry/Response.h 64 maga_transformer/cpp/disaggregate/rtpllm_master/entry/RtpLLMMasterEntry.cpp 92 maga_transformer/cpp/disaggregate/rtpllm_master/entry/RtpLLMMasterEntry.h 28 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/EstimatorConfig.h 24 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/LookupMapImpl.cpp 187 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/LookupMapImpl.h 81 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/LookupPrefillEstimator.cpp 25 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/LookupPrefillEstimator.h 19 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/PrefillTimeEstimator.cpp 24 maga_transformer/cpp/disaggregate/rtpllm_master/estimator/PrefillTimeEstimator.h 22 maga_transformer/cpp/disaggregate/rtpllm_master/tokenize/RemoteTokenizeModule.cpp 69 maga_transformer/cpp/disaggregate/rtpllm_master/tokenize/RemoteTokenizeModule.h 17 maga_transformer/cpp/embedding_engine/EmbeddingEngine.cc 82 maga_transformer/cpp/embedding_engine/EmbeddingEngine.h 40 maga_transformer/cpp/embedding_engine/EmbeddingExecutor.cc 232 maga_transformer/cpp/embedding_engine/EmbeddingExecutor.h 35 maga_transformer/cpp/embedding_engine/EmbeddingQuery.cc 33 maga_transformer/cpp/embedding_engine/EmbeddingQuery.h 64 maga_transformer/cpp/embedding_engine/EmbeddingQueryConverter.cc 37 maga_transformer/cpp/embedding_engine/EmbeddingQueryConverter.h 15 maga_transformer/cpp/embedding_engine/EmbeddingScheduler.cc 64 maga_transformer/cpp/embedding_engine/EmbeddingScheduler.h 25 maga_transformer/cpp/embedding_engine/EmbeddingStream.cc 79 maga_transformer/cpp/embedding_engine/EmbeddingStream.h 54 maga_transformer/cpp/embedding_engine/arpc/ArpcServerWrapper.cc 26 maga_transformer/cpp/embedding_engine/arpc/ArpcServerWrapper.h 15 maga_transformer/cpp/embedding_engine/arpc/ArpcServiceCreator.cc 15 maga_transformer/cpp/embedding_engine/arpc/ArpcServiceCreator.h 15 maga_transformer/cpp/embedding_engine/grpc/AllEmbeddingRpcServiceImpl.cc 29 maga_transformer/cpp/embedding_engine/grpc/AllEmbeddingRpcServiceImpl.h 22 maga_transformer/cpp/embedding_engine/handlers/HandlerBase.h 32 maga_transformer/cpp/embedding_engine/handlers/LinearSoftmaxHandler.cc 58 maga_transformer/cpp/embedding_engine/handlers/LinearSoftmaxHandler.h 21 maga_transformer/cpp/engine_base/EngineBase.cc 35 maga_transformer/cpp/engine_base/EngineBase.h 61 maga_transformer/cpp/engine_base/Executor.h 84 maga_transformer/cpp/eplb/ExpertBalancer.cc 273 maga_transformer/cpp/eplb/ExpertBalancer.h 109 maga_transformer/cpp/eplb/ExpertBalancerPythonWrapper.cc 37 maga_transformer/cpp/eplb/ExpertBalancerPythonWrapper.h 45 maga_transformer/cpp/http_server/http_client/ConnectionPool.cpp 110 maga_transformer/cpp/http_server/http_client/ConnectionPool.h 30 maga_transformer/cpp/http_server/http_client/HandleHttpPacket.cpp 50 maga_transformer/cpp/http_server/http_client/HandleHttpPacket.h 26 maga_transformer/cpp/http_server/http_client/SimpleHttpClient.cpp 89 maga_transformer/cpp/http_server/http_client/SimpleHttpClient.h 44 maga_transformer/cpp/http_server/http_server/ANetApp.cpp 39 maga_transformer/cpp/http_server/http_server/ANetApp.h 26 maga_transformer/cpp/http_server/http_server/HttpError.h 16 maga_transformer/cpp/http_server/http_server/HttpRequest.cpp 34 maga_transformer/cpp/http_server/http_server/HttpRequest.h 31 maga_transformer/cpp/http_server/http_server/HttpRequestWorkItem.cpp 17 maga_transformer/cpp/http_server/http_server/HttpRequestWorkItem.h 24 maga_transformer/cpp/http_server/http_server/HttpResponse.cpp 23 maga_transformer/cpp/http_server/http_server/HttpResponse.h 32 maga_transformer/cpp/http_server/http_server/HttpResponseWriter.cpp 127 maga_transformer/cpp/http_server/http_server/HttpResponseWriter.h 44 maga_transformer/cpp/http_server/http_server/HttpRouter.cpp 45 maga_transformer/cpp/http_server/http_server/HttpRouter.h 26 maga_transformer/cpp/http_server/http_server/HttpServer.cpp 43 maga_transformer/cpp/http_server/http_server/HttpServer.h 37 maga_transformer/cpp/http_server/http_server/HttpServerAdapter.cpp 116 maga_transformer/cpp/http_server/http_server/HttpServerAdapter.h 28 maga_transformer/cpp/kernels/_add.h 135 maga_transformer/cpp/kernels/_cast_to_int8.h 54 maga_transformer/cpp/kernels/_convert_from_float.h 71 maga_transformer/cpp/kernels/_convert_from_fp8.h 96 maga_transformer/cpp/kernels/_convert_to_float.h 75 maga_transformer/cpp/kernels/_convert_to_fp8.h 63 maga_transformer/cpp/kernels/_fma.h 525 maga_transformer/cpp/kernels/_logn_attention.h 99 maga_transformer/cpp/kernels/_mul.h 562 maga_transformer/cpp/kernels/_sum_dot_zero.h 80 maga_transformer/cpp/kernels/_vector_abs_max.h 38 maga_transformer/cpp/kernels/activation_fp8_kernels.cu 235 maga_transformer/cpp/kernels/activation_fp8_kernels.h 23 maga_transformer/cpp/kernels/activation_kernels.cu 711 maga_transformer/cpp/kernels/activation_kernels.h 88 maga_transformer/cpp/kernels/add_residual_kernels.cu 507 maga_transformer/cpp/kernels/add_residual_kernels.h 84 maga_transformer/cpp/kernels/alpha_layernorm_kernels.cu 766 maga_transformer/cpp/kernels/alpha_layernorm_kernels.h 47 maga_transformer/cpp/kernels/banRepeatNgram.cu 164 maga_transformer/cpp/kernels/banRepeatNgram.h 20 maga_transformer/cpp/kernels/ban_bad_words.cu 148 maga_transformer/cpp/kernels/ban_bad_words.h 19 maga_transformer/cpp/kernels/comm_buffer.cu 105 maga_transformer/cpp/kernels/comm_buffer.h 45 maga_transformer/cpp/kernels/custom_ar_kernels.cu 387 maga_transformer/cpp/kernels/custom_ar_kernels.h 56 maga_transformer/cpp/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention.cu 286 maga_transformer/cpp/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention.h 149 maga_transformer/cpp/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_launch.h 228 maga_transformer/cpp/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.h 1663 maga_transformer/cpp/kernels/decoder_masked_multihead_attention_utils.h 2737 maga_transformer/cpp/kernels/eplb/experts_stats_kernels.cu 126 maga_transformer/cpp/kernels/eplb/experts_stats_kernels.h 24 maga_transformer/cpp/kernels/gen_relative_pos_bias.cu 274 maga_transformer/cpp/kernels/gen_relative_pos_bias.h 37 maga_transformer/cpp/kernels/gpt_kernels.cu 1633 maga_transformer/cpp/kernels/gpt_kernels.h 252 maga_transformer/cpp/kernels/hello_world.cu 29 maga_transformer/cpp/kernels/hello_world.h 11 maga_transformer/cpp/kernels/int8_utils.cuh 47 maga_transformer/cpp/kernels/kv_cache/kv_cache_index.h 36 maga_transformer/cpp/kernels/kv_cache/kv_cache_utils.h 148 maga_transformer/cpp/kernels/l1norm_kernels.cu 92 maga_transformer/cpp/kernels/l1norm_kernels.h 10 maga_transformer/cpp/kernels/layernorm_fp8_kernels.cu 998 maga_transformer/cpp/kernels/layernorm_fp8_kernels.h 109 maga_transformer/cpp/kernels/layernorm_kernels.cu 652 maga_transformer/cpp/kernels/layernorm_kernels.h 48 maga_transformer/cpp/kernels/logprob_kernels.cu 189 maga_transformer/cpp/kernels/logprob_kernels.h 16 maga_transformer/cpp/kernels/mla_kernels/mla_merge_transpose_kernel.cu 169 maga_transformer/cpp/kernels/mla_kernels/mla_merge_transpose_kernel.h 29 maga_transformer/cpp/kernels/moe_topKSoftmax_kernels.cu 798 maga_transformer/cpp/kernels/moe_topKSoftmax_kernels.h 71 maga_transformer/cpp/kernels/no_aux_tc_kernels.cu 646 maga_transformer/cpp/kernels/no_aux_tc_kernels.h 9 maga_transformer/cpp/kernels/penalty_types.h 23 maga_transformer/cpp/kernels/quantization_tensor.cu 188 maga_transformer/cpp/kernels/quantization_tensor.h 15 maga_transformer/cpp/kernels/quantize_weight.cu 141 maga_transformer/cpp/kernels/quantize_weight.h 15 maga_transformer/cpp/kernels/rmsnormKernels.cu 443 maga_transformer/cpp/kernels/rmsnormKernels.h 34 maga_transformer/cpp/kernels/rocm/layernorm_kernels.cu 1124 maga_transformer/cpp/kernels/rocm/layernorm_kernels.h 63 maga_transformer/cpp/kernels/rocm/quantization_rocm.cu 514 maga_transformer/cpp/kernels/rocm/quantization_rocm.h 59 maga_transformer/cpp/kernels/rotary_position_embedding.h 720 maga_transformer/cpp/kernels/sampling_penalty_kernels.cu 607 maga_transformer/cpp/kernels/sampling_penalty_kernels.h 74 maga_transformer/cpp/kernels/sampling_topk_kernels.cu 575 maga_transformer/cpp/kernels/sampling_topk_kernels.h 74 maga_transformer/cpp/kernels/sampling_topp_kernels.cu 1458 maga_transformer/cpp/kernels/sampling_topp_kernels.h 138 maga_transformer/cpp/kernels/stop_criteria_kernels.cu 138 maga_transformer/cpp/kernels/stop_criteria_kernels.h 22 maga_transformer/cpp/kernels/triton/aot_triton_kernel.bzl 252 maga_transformer/cpp/kernels/triton/aot_triton_kernel_compiler.py 105 maga_transformer/cpp/kernels/triton/aot_triton_kernels_linker.py 38 maga_transformer/cpp/kernels/triton/layernorm_kernels.cu 82 maga_transformer/cpp/kernels/triton/layernorm_kernels.h 15 maga_transformer/cpp/kernels/triton/layernorm_kernels.py 51 maga_transformer/cpp/kernels/unfused_attention_fp8_kernels.cu 1012 maga_transformer/cpp/kernels/unfused_attention_fp8_kernels.h 135 maga_transformer/cpp/kernels/unfused_attention_kernels.cu 2425 maga_transformer/cpp/kernels/unfused_attention_kernels.h 178 maga_transformer/cpp/kernels/vec_dtypes.cuh 1229 maga_transformer/cpp/lora/LoraManager.cc 129 maga_transformer/cpp/lora/LoraManager.h 47 maga_transformer/cpp/metrics/KmonParam.cc 70 maga_transformer/cpp/metrics/KmonParam.h 32 maga_transformer/cpp/metrics/RtpLLMMetrics.cc 360 maga_transformer/cpp/metrics/RtpLLMMetrics.h 394 maga_transformer/cpp/model_rpc/DecodeGenerateContext.cc 74 maga_transformer/cpp/model_rpc/DecodeGenerateContext.h 63 maga_transformer/cpp/model_rpc/DecodeRpcServer.cc 638 maga_transformer/cpp/model_rpc/DecodeRpcServer.h 57 maga_transformer/cpp/model_rpc/GenerateContext.cc 47 maga_transformer/cpp/model_rpc/GenerateContext.h 97 maga_transformer/cpp/model_rpc/LocalRpcServer.cc 137 maga_transformer/cpp/model_rpc/LocalRpcServer.h 65 maga_transformer/cpp/model_rpc/LocalRpcServiceImpl.h 61 maga_transformer/cpp/model_rpc/PrefillGenerateContext.cc 156 maga_transformer/cpp/model_rpc/PrefillGenerateContext.h 91 maga_transformer/cpp/model_rpc/PrefillRpcServer.cc 408 maga_transformer/cpp/model_rpc/PrefillRpcServer.h 42 maga_transformer/cpp/model_rpc/PrefillRpcServerRuntimeMeta.h 57 maga_transformer/cpp/model_rpc/QueryConverter.cc 226 maga_transformer/cpp/model_rpc/QueryConverter.h 24 maga_transformer/cpp/model_rpc/RPCPool.h 68 maga_transformer/cpp/model_rpc/RemoteRpcServer.cc 83 maga_transformer/cpp/model_rpc/RemoteRpcServer.h 26 maga_transformer/cpp/model_rpc/RemoteRpcServiceImpl.cc 19 maga_transformer/cpp/model_rpc/RemoteRpcServiceImpl.h 69 maga_transformer/cpp/model_rpc/RemoteServerResource.h 16 maga_transformer/cpp/model_rpc/model_rpc_client.py 188 maga_transformer/cpp/models/BaseLogitsProcessor.cc 15 maga_transformer/cpp/models/BaseLogitsProcessor.h 19 maga_transformer/cpp/models/GptModel.cc 1224 maga_transformer/cpp/models/GptModel.h 211 maga_transformer/cpp/models/MTPModel.cc 46 maga_transformer/cpp/models/MTPModel.h 14 maga_transformer/cpp/models/SampleInfos.h 72 maga_transformer/cpp/models/Sampler.cc 164 maga_transformer/cpp/models/Sampler.h 22 maga_transformer/cpp/models/ThinkModeLogitsProcessor.cc 55 maga_transformer/cpp/models/ThinkModeLogitsProcessor.h 23 maga_transformer/cpp/models_weight/W.h 143 maga_transformer/cpp/multimodal_processor/LocalMultimodalProcessor.h 60 maga_transformer/cpp/multimodal_processor/MultimodalProcessor.cc 164 maga_transformer/cpp/multimodal_processor/MultimodalProcessor.h 39 maga_transformer/cpp/multimodal_processor/RemoteMultimodalProcessor.h 97 maga_transformer/cpp/normal_engine/NormalBatchStreamProcessor.cc 415 maga_transformer/cpp/normal_engine/NormalBatchStreamProcessor.h 55 maga_transformer/cpp/normal_engine/NormalEngine.cc 277 maga_transformer/cpp/normal_engine/NormalEngine.h 58 maga_transformer/cpp/normal_engine/NormalExecutor.cc 179 maga_transformer/cpp/normal_engine/NormalExecutor.h 41 maga_transformer/cpp/normal_engine/NormalGenerateStream.cc 150 maga_transformer/cpp/normal_engine/NormalGenerateStream.h 31 maga_transformer/cpp/openai/ApiDataType.cc 139 maga_transformer/cpp/openai/ApiDataType.h 119 maga_transformer/cpp/openai/ChatRender.cc 171 maga_transformer/cpp/openai/ChatRender.h 66 maga_transformer/cpp/openai/OpenaiEndpoint.cc 120 maga_transformer/cpp/openai/OpenaiEndpoint.h 30 maga_transformer/cpp/position_ids_generator/PositionIdsGenerator.cc 102 maga_transformer/cpp/position_ids_generator/PositionIdsGenerator.h 28 maga_transformer/cpp/proto/create_grpc_proto.py 11 maga_transformer/cpp/proto/embedding/all_embedding_rpc_service.proto 15 maga_transformer/cpp/proto/model_rpc_service.proto 197 maga_transformer/cpp/rocm/amd_bfloat16.h 121 maga_transformer/cpp/rocm/cuda_shims.h 94 maga_transformer/cpp/rocm/hip_utils.cc 87 maga_transformer/cpp/rocm/hip_utils.h 233 maga_transformer/cpp/rocm/hipblasAlgoMap.cc 252 maga_transformer/cpp/rocm/hipblasAlgoMap.h 92 maga_transformer/cpp/rocm/hipblasMMWrapper.cc 366 maga_transformer/cpp/rocm/hipblasMMWrapper.h 104 maga_transformer/cpp/rocm/int4_gemm_kernels/0_int4_dequant_gemm_256x128x128x128_32_32x32_2x2_16x16x1_4x64x1_32_1x32x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/10_int4_dequant_gemm_128x128x16x128_16_16x16_4x1_16x8x1_8x16x1_16_1x16x1x8_2_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/11_int4_dequant_gemm_128x64x32x128_32_32x32_1x1_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/12_int4_dequant_gemm_128x64x16x128_16_16x16_2x1_16x8x1_8x16x1_16_1x16x1x8_2_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/13_int4_dequant_gemm_128x32x16x128_16_16x16_1x1_16x8x1_8x16x1_16_1x16x1x8_2_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/16_int4_dequant_gemm_128x16x32x128_32_16x16_1x1_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/17_int4_dequant_gemm_128x16x64x128_32_16x16_1x2_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/18_int4_dequant_gemm_128x32x64x128_32_32x32_1x1_16x8x1_4x32x1_32_1x16x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/19_int4_dequant_gemm_128x16x128x128_32_16x16_1x4_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/1_int4_dequant_gemm_256x128x128x64_32_32x32_2x2_8x32x1_2x128x1_32_1x32x1x8_8_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/20_int4_dequant_gemm_128x32x128x128_32_32x32_1x2_16x8x1_4x32x1_32_1x16x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/21_int4_dequant_gemm_256x16x256x128_32_16x16_1x4_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/22_int4_dequant_gemm_256x32x256x128_32_32x32_1x2_16x16x1_4x64x1_32_1x16x1x16_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/23_int4_dequant_gemm_128x64x32x128_32_32x32_1x1_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/24_int4_dequant_gemm_128x64x16x128_16_16x16_2x1_16x8x1_8x16x1_16_1x16x1x8_2_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/25_int4_dequant_gemm_128x32x16x128_16_16x16_1x1_16x8x1_8x16x1_16_1x16x1x8_2_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/26_int4_dequant_gemm_64x16x16x128_16_16x16_1x1_16x4x1_8x8x1_16_1x16x1x4_4_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/28_int4_dequant_gemm_128x16x32x128_32_16x16_1x1_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/29_int4_dequant_gemm_128x16x64x128_32_16x16_1x2_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/30_int4_dequant_gemm_128x32x64x128_32_32x32_1x1_16x8x1_4x32x1_32_1x16x1x8_8_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/31_int4_dequant_gemm_128x16x128x128_32_16x16_1x4_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/32_int4_dequant_gemm_128x32x128x128_32_32x32_1x2_16x8x1_4x32x1_32_1x16x1x8_8_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/33_int4_dequant_gemm_256x16x256x128_32_16x16_1x4_16x8x1_4x32x1_32_1x16x1x16_4_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/34_int4_dequant_gemm_256x32x256x128_32_32x32_1x2_16x16x1_4x64x1_32_1x16x1x16_8_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/35_int4_dequant_gemm_256x128x128x64_32_32x32_2x2_8x32x1_2x128x1_32_1x32x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/36_int4_dequant_gemm_256x128x128x64_32_32x32_4x1_8x32x1_2x128x1_32_1x32x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/37_int4_dequant_gemm_256x16x64x256_32_16x16_1x1_32x8x1_8x32x1_32_1x16x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/3_int4_dequant_gemm_256x128x128x64_32_32x32_2x2_8x32x1_2x128x1_16_1x32x1x8_8_intrawave_v4.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/4_int4_dequant_gemm_256x128x128x64_32_32x32_2x2_8x32x1_2x128x1_16_1x32x1x8_8_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/5_int4_dequant_gemm_128x32x16x128_16_16x16_1x1_8x16x1_8x16x1_16_1x16x1x8_2_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/6_int4_dequant_gemm_64x16x16x128_16_16x16_1x1_16x4x1_8x8x1_16_1x16x1x4_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/7_int4_dequant_gemm_64x16x16x128_16_16x16_1x1_8x8x1_8x8x1_16_1x16x1x4_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/8_int4_dequant_gemm_128x16x32x128_32_16x16_1x1_8x16x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/9_int4_dequant_gemm_128x128x32x128_32_32x32_2x1_16x8x1_4x32x1_32_1x16x1x8_4_intrawave_v3.cc 24 maga_transformer/cpp/rocm/int4_gemm_kernels/int4_dequant_comm.h 174 maga_transformer/cpp/rocm/int4_gemm_kernels/int4_dequant_kernel_manifest.h 40 maga_transformer/cpp/rocm/quantizePreprocessors.cc 491 maga_transformer/cpp/rocm/quantizePreprocessors.h 30 maga_transformer/cpp/rocm/rocmCKGemmWrapper.cc 134 maga_transformer/cpp/rocm/rocmCKGemmWrapper.h 25 maga_transformer/cpp/rocm/rocmFmhaWrapper.cc 228 maga_transformer/cpp/rocm/rocmFmhaWrapper.h 45 maga_transformer/cpp/rocm/rocmMoeWrapper.cc 106 maga_transformer/cpp/rocm/rocmMoeWrapper.h 40 maga_transformer/cpp/schedulers/BatchDecodeScheduler.h 133 maga_transformer/cpp/schedulers/FIFOScheduler.cc 259 maga_transformer/cpp/schedulers/FIFOScheduler.h 59 maga_transformer/cpp/schedulers/SchedulerBase.h 23 maga_transformer/cpp/speculative_engine/SpeculativeEngine.cc 576 maga_transformer/cpp/speculative_engine/SpeculativeEngine.h 87 maga_transformer/cpp/speculative_engine/SpeculativeOnlineAdaptor.h 13 maga_transformer/cpp/speculative_engine/SpeculativeScheduler.cc 24 maga_transformer/cpp/speculative_engine/SpeculativeScheduler.h 17 maga_transformer/cpp/speculative_engine/SpeculativeStreamOutput.h 85 maga_transformer/cpp/speculative_engine/propose_executor/DeterministicExecutor.cc 139 maga_transformer/cpp/speculative_engine/propose_executor/DeterministicExecutor.h 41 maga_transformer/cpp/speculative_engine/propose_executor/MTPBatchStreamProcessor.cc 51 maga_transformer/cpp/speculative_engine/propose_executor/MTPBatchStreamProcessor.h 11 maga_transformer/cpp/speculative_engine/propose_executor/MTPExecutor.cc 39 maga_transformer/cpp/speculative_engine/propose_executor/MTPExecutor.h 64 maga_transformer/cpp/speculative_engine/propose_executor/MTPStream.h 110 maga_transformer/cpp/speculative_engine/propose_executor/ProposeDynamicConfig.h 7 maga_transformer/cpp/speculative_engine/propose_executor/ProposeExecutor.cc 29 maga_transformer/cpp/speculative_engine/propose_executor/ProposeExecutor.h 31 maga_transformer/cpp/speculative_engine/propose_executor/ProposeOutput.h 28 maga_transformer/cpp/speculative_engine/propose_executor/VanillaExecutor.cc 45 maga_transformer/cpp/speculative_engine/propose_executor/VanillaExecutor.h 46 maga_transformer/cpp/speculative_engine/propose_executor/VanillaStream.h 74 maga_transformer/cpp/speculative_engine/score_executor/ScoreBatchStreamProcessor.cc 143 maga_transformer/cpp/speculative_engine/score_executor/ScoreBatchStreamProcessor.h 15 maga_transformer/cpp/speculative_engine/score_executor/ScoreExecutor.cc 63 maga_transformer/cpp/speculative_engine/score_executor/ScoreExecutor.h 44 maga_transformer/cpp/speculative_engine/score_executor/ScoreOutput.h 21 maga_transformer/cpp/speculative_engine/score_executor/ScoreStream.h 74 maga_transformer/cpp/speculative_engine/speculative_sampler/RejectionSampler.cc 151 maga_transformer/cpp/speculative_engine/speculative_sampler/RejectionSampler.h 19 maga_transformer/cpp/speculative_engine/speculative_sampler/SpeculativeSampler.cc 16 maga_transformer/cpp/speculative_engine/speculative_sampler/SpeculativeSampler.h 21 maga_transformer/cpp/speculative_engine/speculative_sampler/SpeculativeSamplerOutput.h 21 maga_transformer/cpp/speculative_engine/speculative_updater/SpeculativeUpdater.cc 46 maga_transformer/cpp/speculative_engine/speculative_updater/SpeculativeUpdater.h 47 maga_transformer/cpp/speculative_engine/speculative_updater/SpeculativeUpdaterConfig.cc 15 maga_transformer/cpp/speculative_engine/speculative_updater/SpeculativeUpdaterConfig.h 11 maga_transformer/cpp/stats/ExpertStats.h 41 maga_transformer/cpp/stream/CompleteTokenIds.cc 172 maga_transformer/cpp/stream/CompleteTokenIds.h 42 maga_transformer/cpp/stream/GenerateStream.cc 688 maga_transformer/cpp/stream/GenerateStream.h 289 maga_transformer/cpp/stream/StreamCacheResource.cc 235 maga_transformer/cpp/stream/StreamCacheResource.h 83 maga_transformer/cpp/stream/StreamGroups.h 172 maga_transformer/cpp/system_prompt/SystemPrompt.h 34 maga_transformer/cpp/system_prompt/SystemPromptConstructor.cc 40 maga_transformer/cpp/system_prompt/SystemPromptConstructor.h 19 maga_transformer/cpp/th_op/GptInitParameter.cc 378 maga_transformer/cpp/th_op/GptInitParameter.h 220 maga_transformer/cpp/th_op/GptInitParameterRegister.h 7 maga_transformer/cpp/th_op/common/CutlassConfigOps.cc 51 maga_transformer/cpp/th_op/common/InitEngineOps.cc 21 maga_transformer/cpp/th_op/common/InitEngineOps.h 7 maga_transformer/cpp/th_op/common/NcclOp.cc 44 maga_transformer/cpp/th_op/common/NcclOp.h 20 maga_transformer/cpp/th_op/init.cc 20 maga_transformer/cpp/th_op/multi_gpu_gpt/EmbeddingHandlerOp.cc 21 maga_transformer/cpp/th_op/multi_gpu_gpt/EmbeddingHandlerOp.h 32 maga_transformer/cpp/th_op/multi_gpu_gpt/RtpEmbeddingOp.cc 122 maga_transformer/cpp/th_op/multi_gpu_gpt/RtpEmbeddingOp.h 49 maga_transformer/cpp/th_op/multi_gpu_gpt/RtpLLMOp.cc 218 maga_transformer/cpp/th_op/multi_gpu_gpt/RtpLLMOp.h 52 maga_transformer/cpp/th_op/th_utils.h 56 maga_transformer/cpp/tokenizer/Tokenizer.cc 45 maga_transformer/cpp/tokenizer/Tokenizer.h 20 maga_transformer/cpp/trt_plugins/GroupGemmPlugin/GroupGemmPlugin.cpp 21 maga_transformer/cpp/trt_plugins/GroupGemmPlugin/GroupGemmPlugin.h 23 maga_transformer/cpp/trt_plugins/common/checkMacrosPlugin.cpp 13 maga_transformer/cpp/trt_plugins/common/checkMacrosPlugin.h 7 maga_transformer/cpp/trt_plugins/common/trtPluginsInterface.h 2 maga_transformer/cpp/trt_plugins/mixtureOfExperts/mixtureOfExpertsPlugin.cpp 130 maga_transformer/cpp/trt_plugins/mixtureOfExperts/mixtureOfExpertsPlugin.h 71 maga_transformer/cpp/trt_plugins/smoothQuantGemmPlugin/smoothQuantGemmPlugin.cpp 77 maga_transformer/cpp/trt_plugins/smoothQuantGemmPlugin/smoothQuantGemmPlugin.h 36 maga_transformer/cpp/trt_plugins/weightOnlyGroupwiseQuantMatmulPlugin/weightOnlyGroupwiseQuantMatmulPlugin.cpp 121 maga_transformer/cpp/trt_plugins/weightOnlyGroupwiseQuantMatmulPlugin/weightOnlyGroupwiseQuantMatmulPlugin.h 44 maga_transformer/cpp/trt_plugins/weightOnlyQuantMatmulPlugin/weightOnlyQuantMatmulPlugin.cpp 99 maga_transformer/cpp/trt_plugins/weightOnlyQuantMatmulPlugin/weightOnlyQuantMatmulPlugin.h 59 maga_transformer/cpp/utils/AssertUtils.h 37 maga_transformer/cpp/utils/AtomicUtil.h 17 maga_transformer/cpp/utils/AttentionWeight.h 22 maga_transformer/cpp/utils/Cm2Config.h 16 maga_transformer/cpp/utils/DFAUtil.h 79 maga_transformer/cpp/utils/EnumUtils.h 39 maga_transformer/cpp/utils/EplbConfig.h 9 maga_transformer/cpp/utils/ErrorCode.h 211 maga_transformer/cpp/utils/Exception.cc 53 maga_transformer/cpp/utils/Exception.h 23 maga_transformer/cpp/utils/HashUtil.h 16 maga_transformer/cpp/utils/KVCacheUtils.h 10 maga_transformer/cpp/utils/LRUCache.h 82 maga_transformer/cpp/utils/LinearBiasUtil.h 52 maga_transformer/cpp/utils/Logger.cc 86 maga_transformer/cpp/utils/Logger.h 185 maga_transformer/cpp/utils/MlaConfig.h 10 maga_transformer/cpp/utils/NetUtil.h 21 maga_transformer/cpp/utils/PairUnorderedMap.h 31 maga_transformer/cpp/utils/PyUtils.cc 60 maga_transformer/cpp/utils/PyUtils.h 18 maga_transformer/cpp/utils/QuantInfo.h 71 maga_transformer/cpp/utils/RopeConfig.h 44 maga_transformer/cpp/utils/RpcErrorCode.h 85 maga_transformer/cpp/utils/ScopeGuard.h 21 maga_transformer/cpp/utils/ShapeCheck.cc 25 maga_transformer/cpp/utils/ShapeCheck.h 9 maga_transformer/cpp/utils/SignalUtils.cc 70 maga_transformer/cpp/utils/SignalUtils.h 8 maga_transformer/cpp/utils/StackTrace.cc 31 maga_transformer/cpp/utils/StackTrace.h 5 maga_transformer/cpp/utils/StatusUtil.h 46 maga_transformer/cpp/utils/StringUtil.h 91 maga_transformer/cpp/utils/TimeUtil.h 7 maga_transformer/cpp/utils/activation_types.h 42 maga_transformer/cpp/utils/compiler_config.h 23 maga_transformer/cpp/utils/layernorm_types.h 39 maga_transformer/cpp/utils/quantization.h 215 maga_transformer/cpp/utils/utils.h 62 maga_transformer/device/__init__.py 30 maga_transformer/device/device_base.py 40 maga_transformer/device/device_impl.py 305 maga_transformer/distribute/gang_info.py 137 maga_transformer/distribute/gang_server.py 267 maga_transformer/distribute/gang_test_util.py 40 maga_transformer/distribute/worker_info.py 253 maga_transformer/embedding/backend_embedding_app.py 39 maga_transformer/embedding/embedding_endpoint.py 55 maga_transformer/embedding/embedding_type.py 6 maga_transformer/embedding/frontend_embedding_app.py 28 maga_transformer/eplb/ep_balancer.py 182 maga_transformer/eplb/eplb.py 108 maga_transformer/kserve_server.py 44 maga_transformer/lora/__init__.py 1 maga_transformer/lora/lora_file.py 129 maga_transformer/lora/lora_manager.py 70 maga_transformer/lora/lora_weights.py 34 maga_transformer/metrics/__init__.py 5 maga_transformer/metrics/kmonitor_metric_reporter.py 48 maga_transformer/model_factory.py 213 maga_transformer/model_factory_register.py 82 maga_transformer/model_loader/__init__.py 10 maga_transformer/model_loader/attn_weight.py 46 maga_transformer/model_loader/ffn_weight.py 230 maga_transformer/model_loader/group_wise_quant_weight.py 256 maga_transformer/model_loader/load_config.py 125 maga_transformer/model_loader/loader.py 187 maga_transformer/model_loader/model_weight_info.py 394 maga_transformer/model_loader/omni_quant_weight.py 207 maga_transformer/model_loader/per_block_fp8_quant_weight.py 234 maga_transformer/model_loader/per_tensor_int8_quant_weight.py 221 maga_transformer/model_loader/smooth_quant_weight.py 342 maga_transformer/model_loader/static_fp8_quant_weight.py 262 maga_transformer/model_loader/w8a8_weight.py 65 maga_transformer/model_loader/weight_module.py 430 maga_transformer/model_loader/weight_only_quant_weight.py 70 maga_transformer/models/__init__.py 40 maga_transformer/models/base_model.py 467 maga_transformer/models/bert.py 94 maga_transformer/models/bert_weight.py 127 maga_transformer/models/bloom.py 120 maga_transformer/models/chat_glm_v2.py 92 maga_transformer/models/chat_glm_v3.py 24 maga_transformer/models/chat_glm_v4.py 18 maga_transformer/models/chat_glm_v4_vision.py 52 maga_transformer/models/chat_glm_v4_vision_weight.py 17 maga_transformer/models/cogvlm2.py 184 maga_transformer/models/cogvlm2_weight.py 187 maga_transformer/models/cosyvoice_qwen.py 22 maga_transformer/models/deepseek_dequant.py 18 maga_transformer/models/deepseek_v2.py 310 maga_transformer/models/downstream_modules/__init__.py 8 maga_transformer/models/downstream_modules/classifier/api_datatype.py 10 maga_transformer/models/downstream_modules/classifier/bert_classifier.py 55 maga_transformer/models/downstream_modules/classifier/classifier.py 45 maga_transformer/models/downstream_modules/classifier/roberta_classifier.py 33 maga_transformer/models/downstream_modules/classifier/util.py 11 maga_transformer/models/downstream_modules/common_input_generator.py 43 maga_transformer/models/downstream_modules/custom_module.py 48 maga_transformer/models/downstream_modules/embedding/all_embedding_module.py 56 maga_transformer/models/downstream_modules/embedding/api_datatype.py 76 maga_transformer/models/downstream_modules/embedding/bge_m3_embedding_module.py 73 maga_transformer/models/downstream_modules/embedding/colbert_embedding_module.py 53 maga_transformer/models/downstream_modules/embedding/dense_embedding_module.py 86 maga_transformer/models/downstream_modules/embedding/minicpmv_embedding_module.py 297 maga_transformer/models/downstream_modules/embedding/misc.py 90 maga_transformer/models/downstream_modules/embedding/sparse_emebdding_module.py 71 maga_transformer/models/downstream_modules/plugin_loader.py 22 maga_transformer/models/downstream_modules/reranker/api_datatype.py 19 maga_transformer/models/downstream_modules/reranker/reranker_module.py 52 maga_transformer/models/downstream_modules/utils.py 35 maga_transformer/models/eva2clip_vit.py 165 maga_transformer/models/falcon.py 77 maga_transformer/models/glm_v2_weight.py 59 maga_transformer/models/gpt_neox.py 115 maga_transformer/models/gpt_neox_weight.py 106 maga_transformer/models/gpt_util/prefix_encoder.py 28 maga_transformer/models/gpt_util/rms.py 27 maga_transformer/models/gpt_weight.py 40 maga_transformer/models/internvl.py 89 maga_transformer/models/internvl_vit.py 555 maga_transformer/models/internvl_weight.py 117 maga_transformer/models/jina_bert/jina_bert.py 27 maga_transformer/models/jina_bert/jina_bert_weight.py 115 maga_transformer/models/llama.py 186 maga_transformer/models/llama_weight.py 281 maga_transformer/models/llava.py 158 maga_transformer/models/llava_utils.py 104 maga_transformer/models/llava_vit.py 750 maga_transformer/models/llava_weight.py 18 maga_transformer/models/megatron_bert.py 28 maga_transformer/models/megatron_bert_weight.py 84 maga_transformer/models/minicpmv/minicpmv.py 234 maga_transformer/models/minicpmv/modeling_navit_siglip.py 566 maga_transformer/models/minicpmv/resampler.py 565 maga_transformer/models/minicpmv_embedding/minicpmv_embedding.py 289 maga_transformer/models/minicpmv_embedding/resampler.py 112 maga_transformer/models/mixtral.py 113 maga_transformer/models/mpt.py 57 maga_transformer/models/multimodal/multimodal_common.py 94 maga_transformer/models/multimodal/multimodal_mixin.py 202 maga_transformer/models/multimodal/multimodal_trt_engine.py 226 maga_transformer/models/phi.py 63 maga_transformer/models/propose_model/propose_model.py 6 maga_transformer/models/qwen.py 209 maga_transformer/models/qwen2_vl/activations.py 113 maga_transformer/models/qwen2_vl/image_processing_qwen2_vl.py 270 maga_transformer/models/qwen2_vl/modeling_qwen2_vl.py 271 maga_transformer/models/qwen2_vl/qwen2_vl.py 152 maga_transformer/models/qwen2_vl/qwen2_vl_vit.py 175 maga_transformer/models/qwen_v2.py 194 maga_transformer/models/qwen_v2_audio/configuration_qwen2_audio.py 79 maga_transformer/models/qwen_v2_audio/modeling_qwen2_audio.py 315 maga_transformer/models/qwen_v2_audio/processor.py 59 maga_transformer/models/qwen_v2_audio/qwen_v2_audio.py 50 maga_transformer/models/qwen_v2_moe.py 69 maga_transformer/models/qwen_v3_moe.py 30 maga_transformer/models/qwen_vl.py 132 maga_transformer/models/qwen_vl_vit.py 291 maga_transformer/models/qwen_vl_weight.py 14 maga_transformer/models/rotary_embedding/deepseek_rotary_embedding.py 169 maga_transformer/models/sgpt_bloom.py 56 maga_transformer/models/sgpt_bloom_vector.py 61 maga_transformer/models/starcoder.py 121 maga_transformer/models/starcoder2.py 153 maga_transformer/models/whisper.py 95 maga_transformer/models/whisper_weight.py 64 maga_transformer/openai/api_datatype.py 183 maga_transformer/openai/openai_endpoint.py 249 maga_transformer/openai/renderer_factory.py 63 maga_transformer/openai/renderer_factory_register.py 7 maga_transformer/openai/renderers/__init__.py 17 maga_transformer/openai/renderers/basic_renderer.py 130 maga_transformer/openai/renderers/chatglm4_renderer.py 88 maga_transformer/openai/renderers/cogvlm2_render.py 68 maga_transformer/openai/renderers/conversation.py 1228 maga_transformer/openai/renderers/custom_renderer.py 851 maga_transformer/openai/renderers/fast_chat_renderer.py 42 maga_transformer/openai/renderers/internvl_renderer.py 151 maga_transformer/openai/renderers/llama_template.py 733 maga_transformer/openai/renderers/llama_template_renderer.py 61 maga_transformer/openai/renderers/llava_renderer.py 150 maga_transformer/openai/renderers/minicpmv_renderer.py 84 maga_transformer/openai/renderers/qwen_agent/__init__.py 3 maga_transformer/openai/renderers/qwen_agent/llm/__init__.py 36 maga_transformer/openai/renderers/qwen_agent/llm/base.py 332 maga_transformer/openai/renderers/qwen_agent/llm/function_calling.py 358 maga_transformer/openai/renderers/qwen_agent/llm/oai.py 99 maga_transformer/openai/renderers/qwen_agent/llm/openvino.py 118 maga_transformer/openai/renderers/qwen_agent/llm/qwen_dashscope.py 197 maga_transformer/openai/renderers/qwen_agent/llm/qwenvl_dashscope.py 90 maga_transformer/openai/renderers/qwen_agent/llm/schema.py 92 maga_transformer/openai/renderers/qwen_agent/llm/text_base.py 19 maga_transformer/openai/renderers/qwen_agent/log.py 17 maga_transformer/openai/renderers/qwen_agent/settings.py 9 maga_transformer/openai/renderers/qwen_agent/utils/__init__.py 1 maga_transformer/openai/renderers/qwen_agent/utils/parallel_executor.py 26 maga_transformer/openai/renderers/qwen_agent/utils/str_processing.py 21 maga_transformer/openai/renderers/qwen_agent/utils/tokenization_qwen.py 153 maga_transformer/openai/renderers/qwen_agent/utils/tool_function_converter/__init__.py 3 maga_transformer/openai/renderers/qwen_agent/utils/tool_function_converter/request_converter.py 152 maga_transformer/openai/renderers/qwen_agent/utils/tool_function_converter/response_converter.py 58 maga_transformer/openai/renderers/qwen_agent/utils/utils.py 278 maga_transformer/openai/renderers/qwen_agent_renderer.py 222 maga_transformer/openai/renderers/qwen_agent_tool_renderer.py 65 maga_transformer/openai/renderers/qwen_renderer.py 453 maga_transformer/openai/renderers/qwen_v2_audio_renderer.py 75 maga_transformer/openai/renderers/qwen_vl_renderer.py 99 maga_transformer/ops/__init__.py 52 maga_transformer/ops/comm/nccl_op.py 25 maga_transformer/ops/comm/parallel_op.py 44 maga_transformer/ops/libth_transformer.pyi 407 maga_transformer/ops/rtp_llm/rtp_llm_op.py 38 maga_transformer/pipeline/__init__.py 1 maga_transformer/pipeline/chatapi_format.py 38 maga_transformer/pipeline/default_plugin.py 44 maga_transformer/pipeline/pipeline.py 256 maga_transformer/pipeline/pipeline_custom_func.py 35 maga_transformer/plugins/ret_hidden_states.py 5 maga_transformer/server/backend_app.py 196 maga_transformer/server/backend_server.py 207 maga_transformer/server/frontend_app.py 163 maga_transformer/server/frontend_server.py 253 maga_transformer/server/frontend_worker.py 250 maga_transformer/server/misc.py 66 maga_transformer/server/vit_rpc_server.py 51 maga_transformer/start_backend_server.py 137 maga_transformer/start_frontend_server.py 31 maga_transformer/start_server.py 121 maga_transformer/structure/request_extractor.py 169 maga_transformer/tokenizer/tokenization_chatglm.py 292 maga_transformer/tokenizer/tokenization_chatglm2.py 134 maga_transformer/tokenizer/tokenization_chatglm3.py 229 maga_transformer/tokenizer/tokenization_chatglm4.py 135 maga_transformer/tokenizer/tokenization_qwen.py 171 maga_transformer/tools/__init__.py 1 maga_transformer/tools/api/__init__.py 1 maga_transformer/tools/api/hf_model_helper.py 117 maga_transformer/tools/api/model_basic_info_analyzer.py 189 maga_transformer/tools/api/model_basic_info_analyzer_api.py 33 maga_transformer/tools/api/model_size_evaluator_api.py 74 maga_transformer/tools/api/utils.py 4 maga_transformer/tools/convert/weights_convert.py 232 maga_transformer/tools/fake_bloom.py 58 maga_transformer/tools/fake_glm_v2.py 63 maga_transformer/tools/fake_gpt_neox.py 62 maga_transformer/tools/fake_model_base.py 189 maga_transformer/tools/fake_qwen.py 73 maga_transformer/tools/fake_util.py 20 maga_transformer/tools/log_analyze.py 69 maga_transformer/tools/model_assistant_server.py 52 maga_transformer/tools/quant/__init__.py 10 maga_transformer/tools/quant/awq_quanter.py 31 maga_transformer/tools/quant/base_quanter.py 60 maga_transformer/tools/quant/datasets_adapter.py 82 maga_transformer/tools/quant/fp8_quanter.py 217 maga_transformer/tools/quant/gptq_quanter.py 32 maga_transformer/tools/quant/weights_quant.py 316 maga_transformer/utils/check_util.py 21 maga_transformer/utils/ckpt_file_info.py 155 maga_transformer/utils/complete_response_async_generator.py 32 maga_transformer/utils/concurrency_controller.py 53 maga_transformer/utils/database.py 126 maga_transformer/utils/dump_config_utils.py 25 maga_transformer/utils/export_utils.py 19 maga_transformer/utils/flash_attn_utils.py 6 maga_transformer/utils/ft_plugin.py 51 maga_transformer/utils/fuser.py 178 maga_transformer/utils/gemm_utils/__init__.py 6 maga_transformer/utils/gemm_utils/cutlass_config.py 46 maga_transformer/utils/gemm_utils/device_map.py 58 maga_transformer/utils/grpc_util.py 51 maga_transformer/utils/import_util.py 11 maga_transformer/utils/lru_dict.py 27 maga_transformer/utils/meta_pickler.py 234 maga_transformer/utils/mm_process_engine.py 52 maga_transformer/utils/model_weight.py 1001 maga_transformer/utils/multimodal_util.py 94 maga_transformer/utils/nccl_util.py 9 maga_transformer/utils/oss_util.py 15 maga_transformer/utils/smooth_quant_convert/llama/convert.py 191 maga_transformer/utils/smooth_quant_convert/llama/hf_llama_convert.py 290 maga_transformer/utils/smooth_quant_convert/llama/smoothquant.py 150 maga_transformer/utils/smooth_quant_convert/qwen/convert.py 207 maga_transformer/utils/smooth_quant_convert/qwen/hf_qwen_convert.py 304 maga_transformer/utils/smooth_quant_convert/qwen/smoothquant.py 158 maga_transformer/utils/smooth_quant_convert/qwen/utils.py 122 maga_transformer/utils/tensor_utils.py 9 maga_transformer/utils/thread_safe_deque.py 33 maga_transformer/utils/time_util.py 34 maga_transformer/utils/token_processor.py 110 maga_transformer/utils/tokenizer_utils.py 119 maga_transformer/utils/util.py 163 maga_transformer/utils/version_info.py 13 maga_transformer/utils/weight_type.py 37 maga_transformer/utils/word_util.py 84 open_source/bazel/arch_select.bzl 121 rtpllm_master_py/__init__.py 19 rtpllm_master_py/entry.py 65 rtpllm_master_py/stub/librtpllm_master.pyi 65 workspace.bzl 12