Path Lines of Code CODE_OF_CONDUCT.md 93 CONTRIBUTING.md 162 README.md 141 examples/__init__.py 1 examples/bench_llama_7b.py 73 examples/config_nanoset.yaml 110 examples/config_qwen.py 179 examples/config_qwen.yaml 128 examples/config_qwen_with_moe.yaml 132 examples/config_resume_training.py 97 examples/config_resume_training.yaml 109 examples/config_tiny_llama.py 108 examples/config_tiny_llama.yaml 92 examples/config_tiny_llama_with_s3_upload.yaml 115 examples/contributor-guide/README.md 66 examples/contributor-guide/debug_config_tiny_llama.py 95 examples/contributor-guide/debug_config_tiny_llama.yaml 107 examples/contributor-guide/debug_tiny_llama.sh 14 examples/custom-dataloader/README.md 31 examples/custom-dataloader/config_custom_dl.yaml 103 examples/custom-dataloader/run_train.py 164 examples/doremi/README.md 57 examples/doremi/__init__.py 1 examples/doremi/configs/config_2.8b_llama.yaml 89 examples/doremi/configs/config_2.8b_llama_with_tuned_weights.yaml 89 examples/doremi/configs/config_280m_llama.yaml 90 examples/doremi/configs/config_280m_llama_proxy.yaml 92 examples/doremi/doremi/__init__.py 1 examples/doremi/doremi/config.py 29 examples/doremi/doremi/dataloader.py 262 examples/doremi/doremi/doremi_context.py 30 examples/doremi/doremi/llama.py 216 examples/doremi/doremi/loss.py 126 examples/doremi/doremi/trainer.py 218 examples/doremi/doremi/utils.py 11 examples/doremi/requirements.txt 1 examples/doremi/tests/test_doremi_context.py 36 examples/doremi/tests/test_doremi_dataloader.py 33 examples/doremi/tests/test_doremi_loss.py 235 examples/doremi/tests/test_doremi_sampler.py 356 examples/doremi/tests/test_doremi_utils.py 13 examples/doremi/tests/utils.py 11 examples/doremi/train_doremi.py 20 examples/doremi/train_reference.py 26 examples/doremi/utils.py 4 examples/inference/qwen_moe/README.md 18 examples/inference/qwen_moe/convert.py 252 examples/llama/README.md 15 examples/llama/__init__.py 1 examples/llama/convert_hf_to_nanotron.py 82 examples/llama/convert_nanotron_to_hf.py 119 examples/llama/convert_weights.py 137 examples/llama/requirements.txt 1 examples/llama/tests/test_conversion.py 273 examples/llama/tests/utils.py 9 examples/mamba/README.md 26 examples/mamba/config.py 39 examples/mamba/config_mamba.yaml 104 examples/mamba/convert_hf_to_nanotron.py 202 examples/mamba/convert_nanotron_to_hf.py 146 examples/mamba/create_config_mamba.py 138 examples/mamba/mamba.py 710 examples/mamba/requirements.txt 5 examples/mamba/run_generate.py 196 examples/mamba/run_multinode.sh 95 examples/mamba/selective_scan_interface.py 446 examples/mamba/train_mamba.py 20 examples/mamba/train_mamba.sh 13 examples/mamba/trainer.py 122 examples/moe/README.md 27 examples/moe/config_llamoe.py 135 examples/moe/config_llamoe.yaml 112 examples/moe/llamoe.py 760 examples/moe/moe.py 352 examples/moe/requirements.txt 2 examples/moe/train_moe.py 21 examples/mup/README.md 20 examples/mup/configs/mup_350m_llama_config.yaml 133 examples/mup/configs/sp_350m_llama_config.yaml 106 examples/train_tiny_llama.sh 13 src/nanotron/eval/README.md 8 src/nanotron/parallel/pipeline_parallel/README.md 18 tests/pytest.ini 4