Path Lines of Code README.md 99 README_zh-CN.md 111 examples/baichuan/ds_config_TEMPLATE.json 45 examples/baichuan/ds_train_huggingface_baichuan.py 213 examples/baichuan/evaluate_huggingface_baichuan13b.py 80 examples/baichuan/evaluate_megatron_baichuan13b.py 99 examples/baichuan/finetune_huggingface_baichuan13b.py 41 examples/baichuan/finetune_megatron_baichuan13b.py 54 examples/baichuan/pretrain_megatron_baichuan13b.py 77 examples/baichuan/run_ds_train_huggingface_baichuan.sh 98 examples/baichuan/run_evaluate_huggingface_baichuan.sh 77 examples/baichuan/run_evaluate_megatron_baichuan.sh 140 examples/baichuan/run_finetune_huggingface_baichuan.sh 110 examples/baichuan/run_finetune_megatron_baichuan.sh 149 examples/baichuan/run_pretrain_megatron_baichuan.sh 175 examples/baichuan2/evaluate_huggingface_baichuan.py 81 examples/baichuan2/evaluate_megatron_baichuan.py 107 examples/baichuan2/generate_text_megatron_baichuan.py 27 examples/baichuan2/pretrain_megatron_baichuan.py 85 examples/baichuan2/run_evaluate_huggingface_baichuan.sh 76 examples/baichuan2/run_evaluate_megatron_baichuan.sh 140 examples/baichuan2/run_pretrain_megatron_baichuan.sh 179 examples/baichuan2/run_text_generation_megatron_baichuan.sh 99 examples/bloom/evaluate_huggingface_bloom.py 72 examples/bloom/evaluate_megatron_bloom.py 101 examples/bloom/finetune_megatron_bloom.py 50 examples/bloom/generate_text_bloom.py 19 examples/bloom/pretrain_megatron_bloom.py 74 examples/bloom/run_evaluate_huggingface_bloom.sh 79 examples/bloom/run_evaluate_megatron_bloom.sh 84 examples/bloom/run_finetune_megatron_bloom.sh 147 examples/bloom/run_pretrain_megatron_bloom.sh 155 examples/bloom/run_text_generation_megatron_bloom.sh 104 examples/chatglm/finetune_huggingface_chatglm.py 33 examples/chatglm/finetune_megatron_chatglm.py 41 examples/chatglm/pretrain_megatron_chatglm.py 48 examples/chatglm/run_finetune_huggingface_chatglm.sh 104 examples/chatglm/run_finetune_megatron_chatglm.sh 144 examples/chatglm/run_pretrain_megatron_chatglm.sh 157 examples/codellama/ds_config_TEMPLATE.json 45 examples/codellama/ds_train_huggingface_llama.py 386 examples/codellama/evaluate_huggingface_llama.py 82 examples/codellama/evaluate_megatron_llama.py 106 examples/codellama/finetune_megatron_llama.py 61 examples/codellama/generate_text_megatron_llama.py 27 examples/codellama/pretrain_megatron_llama.py 85 examples/codellama/run_ds_train_huggingface_llama.sh 105 examples/codellama/run_evaluate_huggingface_llama.sh 81 examples/codellama/run_evaluate_megatron_llama.sh 148 examples/codellama/run_finetune_megatron_llama.sh 180 examples/codellama/run_finetune_megatron_llama_withGA.sh 189 examples/codellama/run_pretrain_megatron_llama.sh 185 examples/codellama/run_text_generation_megatron_llama.sh 105 examples/deepseek/run_evaluate_megatron_deepseek.sh 142 examples/deepseek/run_finetune_megatron_deepseek.sh 181 examples/deepseek/run_finetune_megatron_deepseek_withGA.sh 190 examples/deepseek/run_pretrain_megatron_deepseek.sh 186 examples/deepseek/run_text_generation_megatron_deepseek.sh 106 examples/deepseek_v2/README.md 258 examples/deepseek_v2/pretrain_deepseek.py 55 examples/deepseek_v2/run_mcore_deepseek.sh 340 examples/deepseek_v3/README.md 254 examples/deepseek_v3/pretrain_deepseek.py 97 examples/deepseek_v3/run_mcore_deepseek.sh 325 examples/deepspeed/README.md 67 examples/deepspeed/ds_config_TEMPLATE.json 45 examples/deepspeed/ds_train_huggingface_finetune.py 394 examples/deepspeed/run_ds_train_huggingface_finetune.sh 251 examples/deepspeed/text_generation_huggingface.py 37 examples/deepspeed/text_generation_vllm.py 50 examples/falcon/evaluate_huggingface_falcon.py 78 examples/falcon/evaluate_megatron_falcon.py 94 examples/falcon/evaluate_megatron_falcon40b.py 94 examples/falcon/finetune_huggingface_falcon.py 40 examples/falcon/finetune_megatron_falcon.py 51 examples/falcon/finetune_megatron_falcon40b.py 51 examples/falcon/generate_text_megatron_falcon.py 19 examples/falcon/generate_text_megatron_falcon40b.py 19 examples/falcon/pretrain_megatron_falcon.py 71 examples/falcon/pretrain_megatron_falcon40b.py 71 examples/falcon/run_evaluate_huggingface_falcon.sh 75 examples/falcon/run_evaluate_megatron_falcon.sh 81 examples/falcon/run_evaluate_megatron_falcon40b.sh 83 examples/falcon/run_finetune_huggingface_falcon.sh 104 examples/falcon/run_finetune_megatron_falcon.sh 144 examples/falcon/run_finetune_megatron_falcon40b.sh 146 examples/falcon/run_pretrain_megatron_falcon.sh 154 examples/falcon/run_pretrain_megatron_falcon40b.sh 156 examples/falcon/run_text_generation_megatron_falcon.sh 85 examples/falcon/run_text_generation_megatron_falcon40b.sh 87 examples/galactica/evaluate_huggingface_galactica.py 79 examples/galactica/evaluate_megatron_galactica.py 94 examples/galactica/finetune_huggingface_galactica.py 40 examples/galactica/finetune_megatron_galactica.py 51 examples/galactica/pretrain_megatron_galactica.py 71 examples/galactica/run_evaluate_huggingface_galactica.sh 71 examples/galactica/run_evaluate_megatron_galactica.sh 74 examples/galactica/run_finetune_huggingface_galactica.sh 104 examples/galactica/run_finetune_megatron_galactica.sh 138 examples/galactica/run_pretrain_megatron_galactica.sh 155 examples/glm130b/evaluate_megatron_glm130b.py 146 examples/glm130b/evaluate_sat_glm130b.py 143 examples/glm130b/pretrain_megatron_glm130b.py 62 examples/glm130b/run_evaluate_megatron_glm130b.sh 116 examples/glm130b/run_evaluate_sat_glm130b.sh 116 examples/glm130b/run_pretrain_megatron_glm130b.sh 156 examples/gpt3/pretrain_megatron_gpt3.py 90 examples/gpt3/run_pretrain_megatron_gpt3.sh 166 examples/gpt3/run_pretrain_megatron_gpt3_enwiki.sh 169 examples/llama/evaluate_huggingface_llama.py 80 examples/llama/evaluate_megatron_llama.py 94 examples/llama/finetune_huggingface_llama.py 41 examples/llama/finetune_megatron_llama.py 52 examples/llama/generate_text_megatron_llama.py 19 examples/llama/pretrain_megatron_llama.py 72 examples/llama/run_evaluate_huggingface_llama.sh 77 examples/llama/run_evaluate_megatron_llama.sh 86 examples/llama/run_finetune_huggingface_llama.sh 110 examples/llama/run_finetune_megatron_llama.sh 147 examples/llama/run_pretrain_megatron_llama.sh 162 examples/llama/run_text_generation_megatron_llama.sh 92 examples/llama2/README.md 347 examples/llama2/ds_config_TEMPLATE.json 45 examples/llama2/ds_train_huggingface_llama.py 386 examples/llama2/evaluate_huggingface_llama.py 81 examples/llama2/evaluate_huggingface_llama_moe.py 96 examples/llama2/evaluate_mcore_llama.py 122 examples/llama2/evaluate_megatron_llama.py 107 examples/llama2/finetune_mcore_llama_withGA.py 93 examples/llama2/finetune_megatron_llama.py 60 examples/llama2/finetune_megatron_llama_withGA.py 88 examples/llama2/generate_text_megatron_llama.py 27 examples/llama2/pretrain_mcore_llama.py 134 examples/llama2/pretrain_megatron_llama.py 85 examples/llama2/run_ds_train_huggingface_llama.sh 110 examples/llama2/run_evaluate_huggingface_llama.sh 81 examples/llama2/run_evaluate_mcore_llama.sh 149 examples/llama2/run_evaluate_megatron_llama.sh 146 examples/llama2/run_finetune_mcore_llama_withGA.sh 204 examples/llama2/run_finetune_megatron_llama.sh 179 examples/llama2/run_finetune_megatron_llama_withGA.sh 200 examples/llama2/run_mcore_llama2_70b.sh 273 examples/llama2/run_pretrain_mcore_llama.sh 204 examples/llama2/run_pretrain_megatron_llama.sh 187 examples/llama2/run_text_generation_megatron_llama.sh 104 examples/llama3/README.md 350 examples/llama3/entry.sh 5 examples/llama3/mpi_run_pretrain_llama3.sh 271 examples/llama3/pretrain_llama.py 158 examples/llama3/pretrain_llama_mcore070.py 155 examples/llama3/run_finetune_mcore_llama_withGA.sh 196 examples/llama3/run_finetune_megatron_llama_withGA.sh 180 examples/llama3/run_pretrain_llama_70b.sh 271 examples/llama3/run_pretrain_mcore_llama.sh 193 examples/llama3/run_pretrain_megatron_llama.sh 180 examples/llama3_1/README.md 233 examples/llama3_1/pretrain_llama.py 61 examples/llama3_1/run_mcore_llama3_1.sh 281 examples/llava/finetune_megatron_llava.py 76 examples/llava/pretrain_megatron_llava.py 84 examples/llava/run_finetune_megatron_llava.sh 189 examples/llava/run_pretrain_megatron_llava.sh 193 examples/llava_mcore/README.md 137 examples/llava_mcore/dataset_helpers.py 345 examples/llava_mcore/image_processing.py 89 examples/llava_mcore/pretrain_llava.py 355 examples/llava_mcore/run_mcore_llava.sh 223 examples/mistral/README.md 245 examples/mistral/pretrain_mcore_mistral.py 60 examples/mistral/run_mcore_mistral.sh 307 examples/moonlight/README.md 248 examples/moonlight/run_mcore_moonlight.sh 326 examples/qwen/ds_config_TEMPLATE.json 45 examples/qwen/ds_train_huggingface_qwen.py 210 examples/qwen/evaluate_huggingface_qwen.py 81 examples/qwen/evaluate_megatron_qwen.py 107 examples/qwen/evaluate_megatron_qwen_moe.py 124 examples/qwen/finetune_megatron_qwen.py 60 examples/qwen/finetune_megatron_qwen_withGA.py 75 examples/qwen/generate_text_megatron_qwen.py 26 examples/qwen/lm_evaluate_megatron_qwen.py 41 examples/qwen/pretrain_megatron_qwen.py 85 examples/qwen/pretrain_megatron_qwen_upcycled.py 109 examples/qwen/run_ds_train_huggingface_qwen.sh 98 examples/qwen/run_evaluate_huggingface_qwen.sh 82 examples/qwen/run_evaluate_megatron_qwen.sh 153 examples/qwen/run_evaluate_megatron_qwen_moe.sh 169 examples/qwen/run_finetune_megatron_qwen.sh 180 examples/qwen/run_finetune_megatron_qwen_withGA.sh 189 examples/qwen/run_lm_evaluate_huggingface_qwen.sh 11 examples/qwen/run_lm_evaluate_megatron_qwen.sh 156 examples/qwen/run_pretrain_megatron_qwen.sh 191 examples/qwen/run_pretrain_megatron_qwen_upcycled.sh 207 examples/qwen/run_text_generation_megatron_qwen.sh 95 examples/qwen1_5/README.md 609 examples/qwen1_5/evaluate_mcore_qwen.py 112 examples/qwen1_5/pretrain_mcore_qwen.py 158 examples/qwen1_5/pretrain_megablocks_qwen.py 106 examples/qwen1_5/run_evaluate_mcore_qwen.sh 163 examples/qwen1_5/run_evaluate_megatron_qwen.sh 167 examples/qwen1_5/run_finetune_mcore_qwen_withGA.sh 261 examples/qwen1_5/run_finetune_megablocks_qwen_withGA.sh 218 examples/qwen1_5/run_finetune_megatron_qwen.sh 199 examples/qwen1_5/run_finetune_megatron_qwen_withGA.sh 207 examples/qwen1_5/run_pretrain_mcore_qwen.sh 258 examples/qwen1_5/run_pretrain_megablocks_qwen.sh 217 examples/qwen1_5/run_pretrain_megatron_qwen.sh 204 examples/qwen1_5/run_text_generation_megatron_qwen.sh 124 examples/qwen2/README.md 249 examples/qwen2/README_moe.md 239 examples/qwen2/pretrain_qwen.py 60 examples/qwen2/pretrain_qwen2_moe.py 64 examples/qwen2/run_mcore_qwen.sh 354 examples/qwen2/run_mcore_qwen2_moe.sh 310 examples/qwen2_5/README.md 230 examples/qwen2_5/run_mcore_qwen.sh 362 examples/qwen2_5_vl/README.md 182 examples/qwen2_5_vl/pretrain_qwen.py 431 examples/qwen2_5_vl/run_mcore_qwen.sh 282 examples/qwen2_vl/README.md 181 examples/qwen2_vl/pretrain_qwen.py 410 examples/qwen2_vl/run_mcore_qwen.sh 265 examples/qwen3/README.md 230 examples/qwen3/pretrain_qwen.py 97 examples/qwen3/run_mcore_qwen3.sh 439 examples/qwen_vl/finetune_megatron_qwen_vl.py 62 examples/qwen_vl/run_finetune_megatron_qwen_vl.sh 187 examples/qwq/README.md 229 examples/qwq/run_mcore_qwen.sh 278 examples/starcoder/ds_config_TEMPLATE.json 45 examples/starcoder/ds_train_huggingface_starcoder.py 216 examples/starcoder/evaluate_megatron_starcoder.py 102 examples/starcoder/finetune_megatron_starcoder.py 53 examples/starcoder/generate_text_megatron_starcoder.py 23 examples/starcoder/pretrain_megatron_starcoder.py 78 examples/starcoder/run_ds_train_huggingface_starcoder.sh 108 examples/starcoder/run_evaluate_megatron_starcoder.sh 89 examples/starcoder/run_finetune_megatron_starcoder_wgbs.sh 164 examples/starcoder/run_finetune_megatron_starcoder_wogbs.sh 154 examples/starcoder/run_pretrain_megatron_starcoder.sh 165 examples/starcoder/run_text_generation_megatron_starcoder.sh 99 examples/yi/run_evaluate_megatron_yi.sh 136 examples/yi/run_finetune_megatron_yi.sh 169 examples/yi/run_pretrain_megatron_yi.sh 173 megatron_patch/fixes/optimizer_offloading/README.md 45 megatron_patch/fixes/optimizer_offloading/fix_optimizer_offloading.patch 25 megatron_patch/fixes/yarn_args/README.md 10 megatron_patch/fixes/yarn_args/fix_yarn_args.patch 37 megatron_patch/generation/megatron.md 56 megatron_patch/template/README.md 2 rlhf/README.md 85 rlhf/deepspeed-chat/README.md 22 rlhf/trlx/README.md 36 rlhf/trlx/ds_config_bloom.json 37 rlhf/trlx/ds_config_trlx_gptj_summarize.json 23 toolkits/distributed_checkpoints_convertor/README.md 58 toolkits/model_checkpoints_convertor/README.md 11 toolkits/model_checkpoints_convertor/llama/hf_llama_moe/config_TEMPLATE.json 32 toolkits/model_checkpoints_convertor/mistral/hf_mistral_moe/config_TEMPLATE.json 28 toolkits/multimodal_data_preprocessing/dataset_preparation.md 91 toolkits/pretrain_data_preprocessing/README.md 122 toolkits/sft_data_preprocessing/README.md 92