path # lines of code pyproject.toml 61 src/nanotron/metrics_logging.py 166 src/nanotron/data/clm_collator.py 158 src/nanotron/data/tokenized_bytes.py 572 src/nanotron/data/utils.py 12 src/nanotron/data/s3_utils.py 58 src/nanotron/data/sft_processing.py 57 src/nanotron/data/samplers.py 245 src/nanotron/data/nanoset.py 164 src/nanotron/data/processing.py 109 src/nanotron/data/__init__.py 25 src/nanotron/data/dataloader.py 250 src/nanotron/data/nemo_dataset/dataset_utils.py 65 src/nanotron/data/nemo_dataset/blendable_dataset.py 177 src/nanotron/data/nemo_dataset/indexed_dataset.py 225 src/nanotron/data/nemo_dataset/__init__.py 690 src/nanotron/data/nemo_dataset/helpers.cpp 501 src/nanotron/data/dataloader_builder.py 69 src/nanotron/nn/rotary.py 150 src/nanotron/nn/llama3_ring_attention.py 653 src/nanotron/nn/ring_attention.py 481 src/nanotron/nn/activations.py 117 src/nanotron/nn/moe.py 136 src/nanotron/nn/__init__.py 1 src/nanotron/nn/ring_attention_lucidrain.py 1315 src/nanotron/nn/attention.py 167 src/nanotron/nn/flex_attention.py 205 src/nanotron/nn/layer_norm.py 57 src/nanotron/logging/logmixin.py 31 src/nanotron/logging/base.py 273 src/nanotron/logging/timers.py 220 src/nanotron/logging/__init__.py 55 src/nanotron/eval/evaluation_tasks.py 345 src/nanotron/eval/upload_to_wandb.py 64 src/nanotron/eval/one_job_runner.py 175 src/nanotron/eval/__init__.py 1 src/nanotron/helpers.py 633 src/nanotron/optim/named_optimizer.py 57 src/nanotron/optim/inherit_from_other_optimizer.py 52 src/nanotron/optim/base.py 126 src/nanotron/optim/optimizer_from_gradient_accumulator.py 57 src/nanotron/optim/clip_grads.py 63 src/nanotron/optim/__init__.py 12 src/nanotron/optim/zero.py 323 src/nanotron/optim/gradient_accumulator.py 250 src/nanotron/generation/decode.py 649 src/nanotron/generation/sampler.py 166 src/nanotron/generation/__init__.py 2 src/nanotron/generation/generate_store.py 35 src/nanotron/utils.py 103 src/nanotron/config/config.py 515 src/nanotron/config/lighteval_config.py 109 src/nanotron/config/parallelism_config.py 33 src/nanotron/config/__init__.py 4 src/nanotron/config/utils_config.py 99 src/nanotron/config/models_config.py 211 src/nanotron/trainer.py 1104 src/nanotron/parallel/pipeline_parallel/tensor_pointer.py 4 src/nanotron/parallel/pipeline_parallel/state.py 194 src/nanotron/parallel/pipeline_parallel/engine.py 235 src/nanotron/parallel/pipeline_parallel/utils.py 26 src/nanotron/parallel/pipeline_parallel/functional.py 79 src/nanotron/parallel/pipeline_parallel/__init__.py 1 src/nanotron/parallel/pipeline_parallel/block.py 115 src/nanotron/parallel/pipeline_parallel/context_manager.py 19 src/nanotron/parallel/pipeline_parallel/p2p.py 390 src/nanotron/parallel/parameters.py 131 src/nanotron/parallel/tensor_parallel/nn.py 245 src/nanotron/parallel/tensor_parallel/enum.py 8 src/nanotron/parallel/tensor_parallel/functional.py 487 src/nanotron/parallel/tensor_parallel/__init__.py 1 src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py 81 src/nanotron/parallel/utils.py 27 src/nanotron/parallel/sharded_parameters.py 86 src/nanotron/parallel/tied_parameters.py 123 src/nanotron/parallel/data_parallel/utils.py 32 src/nanotron/parallel/__init__.py 1 src/nanotron/parallel/context.py 124 src/nanotron/models/llama.py 926 src/nanotron/models/starcoder2.py 1228 src/nanotron/models/base.py 219 src/nanotron/models/__init__.py 1 src/nanotron/models/qwen.py 810 src/nanotron/s3_checkpoints/s3_mover.py 332 src/nanotron/s3_checkpoints/fsspec.py 25 src/nanotron/s3_checkpoints/__init__.py 3 src/nanotron/sanity_checks.py 241 src/nanotron/distributed.py 179 src/nanotron/fp8/tensor.py 58 src/nanotron/fp8/parameter.py 26 src/nanotron/fp8/utils.py 9 src/nanotron/fp8/meta.py 25 src/nanotron/fp8/linear.py 72 src/nanotron/fp8/kernel.py 55 src/nanotron/fp8/__init__.py 10 src/nanotron/fp8/dtypes.py 5 src/nanotron/fp8/constants.py 9 src/nanotron/__init__.py 1 src/nanotron/random.py 105 src/nanotron/scaling/parametrization.py 162 src/nanotron/serialize/metadata.py 140 src/nanotron/serialize/optimizer.py 265 src/nanotron/serialize/utils.py 66 src/nanotron/serialize/weights.py 293 src/nanotron/serialize/main.py 223 src/nanotron/serialize/__init__.py 5 src/nanotron/serialize/random.py 25 src/nanotron/constants.py 6 scripts/fix_checkpoint_bad_naming.py 23 scripts/scaling_benchmarks.py 382 scripts/log_lighteval_to_wandb.py 79 scripts/weka.py 71 run_train.py 281 tools/preprocess_data.py 87 slurm_launcher.py 378