path # lines of code # active days days since first update days since last update # commits # contributors first updated last updated first contributor last contributor toolkits/multimodal_data_preprocessing/convert_llava_pretrain_to_wds.py 25 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/multimodal_data_preprocessing/build_llava_frame_dataset.py 123 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/multimodal_data_preprocessing/replace_llava_image_key.py 29 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/multimodal_data_preprocessing/convert_custom_dataset_to_wds_chatml.py 98 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/pretrain_data_preprocessing/preprocess_data.py 198 6 619 161 6 3 2023-09-04 2024-12-05 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/pretrain_data_preprocessing/clean_raw_text.py 69 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/pretrain_data_preprocessing/convert_json_to_list.py 10 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/pretrain_data_preprocessing/qwen_hf_preprocess_datasets.py 86 1 473 473 1 1 2024-01-28 2024-01-28 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/pretrain_data_preprocessing/preprocess_wudao2.py 76 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/pretrain_data_preprocessing/preprocess_data_megatron.py 360 10 395 16 10 4 2024-04-15 2025-04-29 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/general/synchronizer.py 119 3 30 3 3 1 2025-04-15 2025-05-12 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/general/__init__.py 6 1 30 30 1 1 2025-04-15 2025-04-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/general/h2m_synchronizer.py 259 2 30 3 2 1 2025-04-15 2025-05-12 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/general/m2h_synchronizer.py 499 2 30 3 2 1 2025-04-15 2025-05-12 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/convert.py 80 1 30 30 1 1 2025-04-15 2025-04-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/deepseek_v3/__init__.py 6 1 30 30 1 1 2025-04-15 2025-04-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/deepseek_v3/patch.py 25 1 30 30 1 1 2025-04-15 2025-04-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/deepseek_v3/h2m_synchronizer.py 46 1 30 30 1 1 2025-04-15 2025-04-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/distributed_checkpoints_convertor/impl/deepseek_v3/m2h_synchronizer.py 42 1 30 30 1 1 2025-04-15 2025-04-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/sft_data_preprocessing/sample_stats.py 23 2 295 42 2 1 2024-07-24 2025-04-03 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/sft_data_preprocessing/build_idxmap_sft_dataset.py 319 5 245 92 5 1 2024-09-12 2025-02-12 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/auto_configurator/report_theoretical_memory.py 147 1 197 197 1 1 2024-10-30 2024-10-30 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/auto_configurator/report_auto_config.py 124 1 197 197 1 1 2024-10-30 2024-10-30 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/falcon40b/configuration_RW.py 50 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/falcon40b/checkpoint_reshaping_and_interoperability.py 583 2 619 468 2 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/glm130b/merge_130b_ckpts.py 96 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/glm130b/checkpoint_reshaping_and_interoperability.py 363 2 619 468 2 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/chatglm/checkpoint_reshaping_and_interoperability.py 396 2 619 468 2 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/llava/hf2mcore_llava.py 669 1 175 175 2 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/utils/__init__.py 146 6 118 42 6 2 2025-01-17 2025-04-03 46404040+lostkevin@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/mistral/hf2mcore_mixtral.py 672 4 388 83 4 2 2024-04-22 2025-02-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/mistral/hf2mcore.py 468 5 389 83 6 2 2024-04-21 2025-02-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/baichuan/configuration_baichuan.py 43 1 608 608 1 1 2023-09-15 2023-09-15 38210876+lwmlyy@users.noreply.github.com 38210876+lwmlyy@users.noreply.github.com toolkits/model_checkpoints_convertor/baichuan/hf2te.py 378 5 619 468 5 2 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/baichuan/checkpoint_reshaping_and_interoperability.py 649 8 619 468 8 2 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe.py 454 13 353 71 13 3 2024-05-27 2025-03-05 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v3_moe.py 578 13 83 42 14 2 2025-02-21 2025-04-03 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/deepseek/fp8_cast_bf16.py 88 1 79 79 1 1 2025-02-25 2025-02-25 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron_ori.py 149 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/bloom/checkpoint_reshaping_and_interoperability.py 572 3 619 468 3 2 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron.py 149 2 619 468 2 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/bloom/reward_model_to_megatron.py 573 3 619 468 3 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/galactica/checkpoint_reshaping_and_interoperability.py 454 4 619 468 4 2 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/llama/hf_llama_moe/llama_moe.py 19 2 444 383 3 2 2024-02-26 2024-04-27 1208266117@qq.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/llama/hf2megatron.py 808 4 389 206 5 2 2024-04-21 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/llama/hf2mcore_llama3_1.py 710 3 265 83 3 2 2024-08-23 2025-02-21 46404040+lostkevin@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/llama/hf2mcore.py 674 3 389 206 4 2 2024-04-21 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/llama/hf2mcore_70b.py 577 4 388 206 4 3 2024-04-22 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/baichuan2/configuration_baichuan.py 43 1 604 604 1 1 2023-09-19 2023-09-19 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/baichuan2/hf2te.py 360 5 574 468 5 2 2023-10-19 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/baichuan2/checkpoint_reshaping_and_interoperability.py 638 11 604 395 14 4 2023-09-19 2024-04-15 38210876+lwmlyy@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/falcon/configuration_RW.py 101 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/falcon/checkpoint_reshaping_and_interoperability.py 612 2 619 468 2 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/yi/checkpoint_reshaping_and_interoperability.py 468 3 544 468 3 1 2023-11-18 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py 821 12 330 83 15 5 2024-06-19 2025-02-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2.5_vl.py 607 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_gqa.py 593 2 367 206 2 2 2024-05-13 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha.py 280 3 367 206 3 3 2024-05-13 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py 555 3 51 16 3 3 2025-03-25 2025-04-29 wanqian5@tal.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.0.py 725 2 389 206 2 2 2024-04-21 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe.py 479 5 367 206 5 3 2024-05-13 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_vl.py 616 9 169 57 9 2 2024-11-27 2025-03-19 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.5.py 810 4 389 206 4 4 2024-04-21 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5.py 546 2 389 206 2 2 2024-04-21 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha_to_moe.py 227 3 367 206 3 3 2024-05-13 2024-10-21 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com toolkits/model_checkpoints_convertor/glm/checkpoint_reshaping_and_interoperability.py 378 2 619 468 2 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com toolkits/model_checkpoints_convertor/starcoder/checkpoint_reshaping_and_interoperability.py 583 3 619 468 3 2 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com rlhf/deepspeed-chat/rm_main.py 319 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com rlhf/deepspeed-chat/utils.py 204 1 616 616 1 1 2023-09-07 2023-09-07 bdshichen@sina.cn bdshichen@sina.cn rlhf/trlx/reward_model_bloom.py 81 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com rlhf/trlx/train_reward_model_bloom.py 151 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com rlhf/trlx/trlx_bloom_rlhf.py 178 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/generation/gpt_predictor.py 74 2 619 473 2 1 2023-09-04 2024-01-28 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/generation/tokenization.py 76 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/generation/generation.py 317 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/generation/api.py 170 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/data/__init__.py 88 12 552 78 12 3 2023-11-10 2025-02-26 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/data/energon/chatml.py 46 1 139 139 1 1 2024-12-27 2024-12-27 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/data/json_sft.py 106 1 83 83 1 1 2025-02-21 2025-02-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/data/image_processing.py 67 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/data/utils.py 318 20 473 45 20 3 2024-01-28 2025-03-31 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/data/dataset_helpers.py 381 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/layer_specs.py 95 2 169 83 2 2 2024-11-27 2025-02-21 46404040+lostkevin@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2_vl/visionmodel.py 160 2 169 139 2 1 2024-11-27 2024-12-27 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/gpt_model.py 143 2 169 120 2 1 2024-11-27 2025-01-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/attention.py 530 2 169 83 2 2 2024-11-27 2025-02-21 46404040+lostkevin@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2_vl/language_module.py 104 1 169 169 1 1 2024-11-27 2024-11-27 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/language_model_embedding.py 98 1 120 120 1 1 2025-01-15 2025-01-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/attention_vision.py 529 2 169 83 2 2 2024-11-27 2025-02-21 46404040+lostkevin@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2_vl/rotary_pos_embedding.py 145 2 169 120 2 1 2024-11-27 2025-01-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/transformer_config.py 53 3 169 118 3 1 2024-11-27 2025-01-17 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/rope_utils.py 108 2 169 139 2 1 2024-11-27 2024-12-27 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_vl/model.py 191 3 169 120 3 1 2024-11-27 2025-01-15 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/falcon40b/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/falcon40b/gpt_model.py 94 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/falcon40b/language_model.py 491 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/falcon40b/transformer.py 683 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2_5_vl/visionmodel.py 224 2 55 35 2 2 2025-03-21 2025-04-10 46404040+lostkevin@users.noreply.github.com wzuck.wang@gmail.com megatron_patch/model/qwen2_5_vl/transformer_block.py 434 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_5_vl/transformer_config.py 55 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2_5_vl/model.py 191 1 55 55 1 1 2025-03-21 2025-03-21 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/mixtral/layer_specs.py 129 4 473 148 4 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/__init__.py 1 2 505 473 2 1 2023-12-27 2024-01-28 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/transformer/attention.py 517 3 468 148 3 1 2024-02-02 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/transformer/mlp.py 193 3 468 148 3 1 2024-02-02 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/transformer_config.py 285 4 473 148 4 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/moe/__init__.py 1 1 473 473 1 1 2024-01-28 2024-01-28 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/moe/token_dispatcher.py 303 3 473 148 3 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/moe/moe_layer.py 113 4 473 148 4 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/moe/experts.py 676 4 473 148 4 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/moe/router.py 171 3 473 148 3 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral/model.py 177 3 473 148 3 1 2024-01-28 2024-12-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/glm130b/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/glm130b/gpt_model.py 80 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/glm130b/language_model.py 434 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/glm130b/transformer.py 875 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/chatglm/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/chatglm/gpt_model.py 82 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/chatglm/language_model.py 473 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/chatglm/transformer.py 604 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/chatglm/positional_embeddings.py 60 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/layer_specs.py 120 3 353 119 3 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/__init__.py 1 2 353 119 2 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/multi_latent_attention.py 276 2 119 17 2 2 2025-01-16 2025-04-28 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/deepseek_v2/transformer_layer.py 226 4 353 119 4 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/mlp.py 196 1 119 119 1 1 2025-01-16 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/transformer_block.py 377 4 353 119 4 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/transformer_config.py 42 5 353 17 5 3 2024-05-27 2025-04-28 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/deepseek_v2/moe/shared_experts.py 180 1 119 119 1 1 2025-01-16 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/moe/moe_layer.py 118 2 353 119 2 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/moe/experts.py 676 2 353 119 2 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/deepseek_v2/model.py 181 2 353 119 2 1 2024-05-27 2025-01-16 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/layer_specs.py 86 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/__init__.py 1 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/transformer/attention.py 322 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/transformer/mlp.py 131 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/transformer_config.py 142 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/grouped_gemm_util.py 12 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/__init__.py 1 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/token_dispatcher.py 172 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/moe_layer.py 57 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/moe_utils.py 39 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/experts.py 136 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/moe/router.py 113 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mixtral_bak/model.py 162 1 142 142 1 1 2024-12-24 2024-12-24 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava/__init__.py 1 2 560 555 2 2 2023-11-02 2023-11-07 jerry.lp@alibaba-inc.com 38210876+lwmlyy@users.noreply.github.com megatron_patch/model/llava/gpt_model.py 89 2 560 555 2 2 2023-11-02 2023-11-07 jerry.lp@alibaba-inc.com 38210876+lwmlyy@users.noreply.github.com megatron_patch/model/llava/language_model.py 507 7 560 505 7 4 2023-11-02 2023-12-27 jerry.lp@alibaba-inc.com jerryli1981@users.noreply.github.com megatron_patch/model/llava/transformer.py 1292 3 560 534 3 3 2023-11-02 2023-11-28 jerry.lp@alibaba-inc.com jerryli1981@users.noreply.github.com megatron_patch/model/llava/clip_encoder.py 75 5 560 503 5 3 2023-11-02 2023-12-29 jerry.lp@alibaba-inc.com jerryli1981@users.noreply.github.com megatron_patch/model/llava/rotary_pos_embedding.py 54 2 560 555 2 2 2023-11-02 2023-11-07 jerry.lp@alibaba-inc.com 38210876+lwmlyy@users.noreply.github.com megatron_patch/model/llava/mm_projector_builder.py 37 3 560 503 3 3 2023-11-02 2023-12-29 jerry.lp@alibaba-inc.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3_1/layer_specs.py 85 1 265 265 1 1 2024-08-23 2024-08-23 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/llama3_1/__init__.py 1 1 265 265 1 1 2024-08-23 2024-08-23 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/llama3_1/rms_norm.py 13 1 265 265 1 1 2024-08-23 2024-08-23 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/llama3_1/transformer_config.py 9 1 265 265 1 1 2024-08-23 2024-08-23 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/llama3_1/model.py 201 1 265 265 1 1 2024-08-23 2024-08-23 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5_megablocks/__init__.py 1 1 395 395 1 1 2024-04-15 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5_megablocks/gpt_model.py 88 1 395 395 1 1 2024-04-15 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5_megablocks/language_model.py 453 1 395 395 1 1 2024-04-15 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5_megablocks/transformer.py 1184 2 395 392 2 1 2024-04-15 2024-04-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5_megablocks/rotary_pos_embedding.py 56 1 395 395 1 1 2024-04-15 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/layer_specs.py 92 2 420 395 2 1 2024-03-21 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/__init__.py 1 1 420 420 1 1 2024-03-21 2024-03-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/transformer/attention.py 402 3 420 351 3 1 2024-03-21 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/transformer/mlp.py 164 4 420 351 4 1 2024-03-21 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/transformer_config.py 7 2 420 367 2 1 2024-03-21 2024-05-13 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/moe/__init__.py 1 1 420 420 1 1 2024-03-21 2024-03-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/moe/token_dispatcher.py 282 3 420 351 3 1 2024-03-21 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/moe/moe_layer.py 78 4 420 326 4 1 2024-03-21 2024-06-23 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/moe/experts.py 188 2 420 395 2 1 2024-03-21 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/moe/router.py 139 4 420 349 4 1 2024-03-21 2024-05-31 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen1_5/model.py 144 3 420 351 3 1 2024-03-21 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mistral/__init__.py 1 1 555 555 2 2 2023-11-07 2023-11-07 38210876+lwmlyy@users.noreply.github.com jerry.lp@alibaba-inc.com megatron_patch/model/mistral/gpt_model.py 88 1 555 555 2 2 2023-11-07 2023-11-07 38210876+lwmlyy@users.noreply.github.com jerry.lp@alibaba-inc.com megatron_patch/model/mistral/language_model.py 466 3 555 393 4 3 2023-11-07 2024-04-17 38210876+lwmlyy@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mistral/transformer.py 1292 2 555 552 3 3 2023-11-07 2023-11-10 38210876+lwmlyy@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mistral/rotary_pos_embedding.py 36 2 555 552 3 3 2023-11-07 2023-11-10 38210876+lwmlyy@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/mistral/modeling_attn_mask_utils.py 121 1 552 552 1 1 2023-11-10 2023-11-10 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/layer_specs.py 108 2 337 170 2 2 2024-06-12 2024-11-26 jerryli1981@users.noreply.github.com 676857171@qq.com megatron_patch/model/qwen2/transformer_layer.py 147 1 337 337 1 1 2024-06-12 2024-06-12 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/rms_norm.py 13 1 337 337 1 1 2024-06-12 2024-06-12 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/transformer_block.py 323 2 337 328 2 1 2024-06-12 2024-06-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/transformer/attention.py 407 1 337 337 1 1 2024-06-12 2024-06-12 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/transformer/mlp.py 258 3 337 170 3 2 2024-06-12 2024-11-26 jerryli1981@users.noreply.github.com 676857171@qq.com megatron_patch/model/qwen2/transformer_config.py 14 3 337 328 3 1 2024-06-12 2024-06-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/moe/__init__.py 1 1 337 337 1 1 2024-06-12 2024-06-12 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/moe/token_dispatcher.py 327 2 337 330 2 1 2024-06-12 2024-06-19 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/moe/moe_layer.py 114 7 337 96 8 4 2024-06-12 2025-02-08 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen2/moe/experts.py 316 3 337 170 3 2 2024-06-12 2024-11-26 jerryli1981@users.noreply.github.com 676857171@qq.com megatron_patch/model/qwen2/moe/router.py 206 3 337 326 3 1 2024-06-12 2024-06-23 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2/model.py 146 2 337 262 2 1 2024-06-12 2024-08-26 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan/gpt_model.py 106 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan/language_model.py 515 2 619 581 2 1 2023-09-04 2023-10-12 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan/transformer.py 1179 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/bloom/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/bloom/gpt_model.py 81 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/bloom/language_model.py 411 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/bloom/transformer.py 811 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/bloom/positional_embeddings.py 122 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/bloom/layers.py 87 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/galactica/__init__.py 1 2 619 616 2 2 2023-09-04 2023-09-07 jerryli1981@users.noreply.github.com bdshichen@sina.cn megatron_patch/model/galactica/gpt_model.py 94 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/galactica/language_model.py 501 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/galactica/transformer.py 570 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama/gpt_model.py 92 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama/language_model.py 501 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama/transformer.py 715 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama/positional_embeddings.py 54 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/layer_specs.py 99 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/__init__.py 1 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/llava_model.py 424 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/llava_spec.py 75 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/transformer_config.py 133 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/vision/vit_layer_specs.py 79 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/vision/__init__.py 1 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/vision/clip_vit_model.py 130 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llava_mcore/vision/multimodal_projector.py 41 1 175 175 1 1 2024-11-21 2024-11-21 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan2/gpt_model.py 97 2 604 552 5 2 2023-09-19 2023-11-10 38210876+lwmlyy@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan2/language_model.py 450 4 604 581 6 3 2023-09-19 2023-10-12 jerryli1981@users.noreply.github.com 38210876+lwmlyy@users.noreply.github.com megatron_patch/model/baichuan2/transformer.py 1292 5 604 574 6 3 2023-09-19 2023-10-19 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/baichuan2/layers.py 134 1 603 603 1 1 2023-09-20 2023-09-20 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen_vl/__init__.py 1 1 505 505 1 1 2023-12-27 2023-12-27 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen_vl/gpt_model.py 89 1 505 505 1 1 2023-12-27 2023-12-27 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen_vl/language_model.py 481 1 505 505 1 1 2023-12-27 2023-12-27 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen_vl/visual.py 296 2 505 503 2 1 2023-12-27 2023-12-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen_vl/transformer.py 1292 1 505 505 1 1 2023-12-27 2023-12-27 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen3_moe/gpt_layer_specs.py 347 1 16 16 1 1 2025-04-29 2025-04-29 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen3_moe/moe_module_specs.py 61 1 16 16 1 1 2025-04-29 2025-04-29 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen3_moe/moe/moe_layer.py 70 1 16 16 1 1 2025-04-29 2025-04-29 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen3_moe/moe/moe_utils.py 79 1 16 16 1 1 2025-04-29 2025-04-29 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/qwen3_moe/moe/router.py 111 1 16 16 1 1 2025-04-29 2025-04-29 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/model/falcon/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/falcon/gpt_model.py 94 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/falcon/language_model.py 491 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/falcon/transformer.py 845 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama2/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama2/gpt_model.py 88 2 619 583 3 3 2023-09-04 2023-10-10 jerryli1981@users.noreply.github.com jerry.lp@alibaba-inc.com megatron_patch/model/llama2/language_model.py 454 8 619 436 11 3 2023-09-04 2024-03-05 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama2/transformer.py 1296 13 619 442 17 4 2023-09-04 2024-02-28 jerryli1981@users.noreply.github.com 38210876+lwmlyy@users.noreply.github.com megatron_patch/model/llama2/rotary_pos_embedding.py 56 4 582 535 5 3 2023-10-11 2023-11-27 jerry.lp@alibaba-inc.com lwmlyy@163.com megatron_patch/model/llama3/layer_specs.py 85 2 357 308 2 1 2024-05-23 2024-07-11 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/__init__.py 1 1 357 357 1 1 2024-05-23 2024-05-23 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/gpt_model.py 88 2 389 351 2 1 2024-04-21 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/language_model.py 438 3 389 351 3 1 2024-04-21 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/rms_norm.py 13 1 308 308 1 1 2024-07-11 2024-07-11 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/transformer/attention.py 402 2 357 351 2 1 2024-05-23 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/transformer/mlp.py 150 2 357 351 2 1 2024-05-23 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/transformer_config.py 8 1 308 308 1 1 2024-07-11 2024-07-11 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/transformer_legacy.py 1252 2 357 351 2 1 2024-05-23 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/llama3/model.py 144 2 357 351 2 1 2024-05-23 2024-05-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen2_moe/layer_specs.py 281 1 64 64 1 1 2025-03-12 2025-03-12 qianwan@ymail.com qianwan@ymail.com megatron_patch/model/qwen2_moe/__init__.py 1 1 64 64 1 1 2025-03-12 2025-03-12 qianwan@ymail.com qianwan@ymail.com megatron_patch/model/qwen2_moe/transformer_config.py 55 1 64 64 1 1 2025-03-12 2025-03-12 qianwan@ymail.com qianwan@ymail.com megatron_patch/model/qwen/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen/gpt_model.py 88 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen/language_model.py 440 2 619 581 2 1 2023-09-04 2023-10-12 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/qwen/transformer.py 1243 5 619 468 5 1 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/starcoder/__init__.py 1 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/starcoder/gpt_model.py 83 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/starcoder/language_model.py 387 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/starcoder/glu_activations.py 32 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/starcoder/transformer.py 848 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/model/starcoder/enums.py 19 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/__init__.py 1 2 619 616 2 2 2023-09-04 2023-09-07 jerryli1981@users.noreply.github.com bdshichen@sina.cn megatron_patch/finetune_utils.py 202 6 619 336 8 4 2023-09-04 2024-06-13 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/training.py 612 8 619 468 9 3 2023-09-04 2024-02-02 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/arguments.py 449 35 619 17 37 5 2023-09-04 2025-04-28 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/template/helper.py 115 6 83 6 6 2 2025-02-21 2025-05-09 jerryli1981@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/lm_evaluate.py 139 3 468 395 3 2 2024-02-02 2024-04-15 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/tokenizer/tokenization_baichuan.py 139 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/tokenizer/tokenization_qwen_vl.py 441 2 505 503 2 1 2023-12-27 2023-12-29 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/tokenizer/tokenization_yi.py 166 1 544 544 1 1 2023-11-18 2023-11-18 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/tokenizer/jiebabpe_tokenizer.py 53 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/tokenizer/icetk_glm130b_tokenizer.py 273 1 619 619 1 1 2023-09-04 2023-09-04 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com megatron_patch/tensor_parallel.py 66 1 139 139 1 1 2024-12-27 2024-12-27 46404040+lostkevin@users.noreply.github.com 46404040+lostkevin@users.noreply.github.com megatron_patch/initialize.py 91 2 505 473 2 1 2023-12-27 2024-01-28 jerryli1981@users.noreply.github.com jerryli1981@users.noreply.github.com