Path Lines of Code megatron_patch/data/dataset_helpers.py 381 megatron_patch/data/json_sft.py 106 megatron_patch/initialize.py 91 megatron_patch/lm_evaluate.py 139 megatron_patch/model/baichuan/language_model.py 515 megatron_patch/model/baichuan/transformer.py 1179 megatron_patch/model/baichuan2/language_model.py 450 megatron_patch/model/baichuan2/transformer.py 1292 megatron_patch/model/deepseek_v2/model.py 181 megatron_patch/model/deepseek_v2/moe/experts.py 676 megatron_patch/model/deepseek_v2/transformer_block.py 377 megatron_patch/model/deepseek_v2/transformer_layer.py 226 megatron_patch/model/falcon/language_model.py 491 megatron_patch/model/falcon/transformer.py 845 megatron_patch/model/falcon40b/language_model.py 491 megatron_patch/model/galactica/language_model.py 501 megatron_patch/model/glm130b/language_model.py 434 megatron_patch/model/llama/language_model.py 501 megatron_patch/model/llama2/language_model.py 454 megatron_patch/model/llama2/transformer.py 1296 megatron_patch/model/llama3/language_model.py 438 megatron_patch/model/llama3/model.py 144 megatron_patch/model/llama3/transformer_legacy.py 1252 megatron_patch/model/llama3_1/model.py 201 megatron_patch/model/llava/language_model.py 507 megatron_patch/model/llava/transformer.py 1292 megatron_patch/model/llava_mcore/llava_model.py 424 megatron_patch/model/llava_mcore/vision/clip_vit_model.py 130 megatron_patch/model/mistral/language_model.py 466 megatron_patch/model/mistral/transformer.py 1292 megatron_patch/model/mixtral/model.py 177 megatron_patch/model/mixtral/moe/experts.py 676 megatron_patch/model/mixtral/transformer_config.py 285 megatron_patch/model/mixtral_bak/model.py 162 megatron_patch/model/mixtral_bak/moe/token_dispatcher.py 172 megatron_patch/model/qwen/language_model.py 440 megatron_patch/model/qwen/transformer.py 1243 megatron_patch/model/qwen1_5/model.py 144 megatron_patch/model/qwen1_5/moe/token_dispatcher.py 282 megatron_patch/model/qwen1_5_megablocks/language_model.py 453 megatron_patch/model/qwen2/model.py 146 megatron_patch/model/qwen2/moe/router.py 206 megatron_patch/model/qwen2/transformer_block.py 323 megatron_patch/model/qwen2/transformer_layer.py 147 megatron_patch/model/qwen2_5_vl/transformer_block.py 434 megatron_patch/model/qwen2_5_vl/visionmodel.py 224 megatron_patch/model/qwen2_vl/visionmodel.py 160 megatron_patch/model/qwen3_moe/moe/moe_utils.py 79 megatron_patch/model/qwen_vl/language_model.py 481 megatron_patch/model/qwen_vl/transformer.py 1292 megatron_patch/model/starcoder/transformer.py 848 megatron_patch/template/helper.py 115 megatron_patch/training.py 612 rlhf/deepspeed-chat/utils.py 204 toolkits/auto_configurator/report_theoretical_memory.py 147 toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v3_moe.py 578 toolkits/model_checkpoints_convertor/glm/checkpoint_reshaping_and_interoperability.py 378 toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py 555