id: 1 unit: def main() file: scripts/scaling_benchmarks.py start line: 0 end line: 0 size: 215 LOC McCabe index: 63 number of parameters: 0 id: 2 unit: py::array build_mapping_impl() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 217 end line: 450 size: 164 LOC McCabe index: 30 number of parameters: 9 id: 3 unit: def _forward_inference() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 150 LOC McCabe index: 9 number of parameters: 8 id: 4 unit: py::array build_blocks_mapping_impl() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 483 end line: 698 size: 149 LOC McCabe index: 29 number of parameters: 9 id: 5 unit: def forward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 111 LOC McCabe index: 15 number of parameters: 7 id: 6 unit: def create_nanotron_config() file: slurm_launcher.py start line: 0 end line: 0 size: 106 LOC McCabe index: 5 number of parameters: 1 id: 7 unit: def parse_args() file: slurm_launcher.py start line: 0 end line: 0 size: 102 LOC McCabe index: 1 number of parameters: 0 id: 8 unit: def backward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 96 LOC McCabe index: 24 number of parameters: 2 id: 9 unit: def data_generator() file: src/nanotron/data/dataloader.py start line: 0 end line: 0 size: 91 LOC McCabe index: 14 number of parameters: 0 id: 10 unit: def _get_text() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 84 LOC McCabe index: 29 number of parameters: 2 id: 11 unit: def backward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 76 LOC McCabe index: 6 number of parameters: 2 id: 12 unit: def pre_training() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 74 LOC McCabe index: 12 number of parameters: 3 id: 13 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 72 LOC McCabe index: 24 number of parameters: 1 id: 14 unit: def _update_dataloader_based_on_training_stages() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 72 LOC McCabe index: 10 number of parameters: 3 id: 15 unit: def __call__() file: src/nanotron/data/clm_collator.py start line: 0 end line: 0 size: 68 LOC McCabe index: 27 number of parameters: 3 id: 16 unit: def lr_scheduler_builder() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 68 LOC McCabe index: 22 number of parameters: 3 id: 17 unit: def __call__() file: src/nanotron/data/clm_collator.py start line: 0 end line: 0 size: 67 LOC McCabe index: 33 number of parameters: 3 id: 18 unit: py::array build_sample_idx() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 100 end line: 198 size: 67 LOC McCabe index: 6 number of parameters: 7 id: 19 unit: def forward() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 66 LOC McCabe index: 22 number of parameters: 2 id: 20 unit: def save_weights() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 62 LOC McCabe index: 13 number of parameters: 3 id: 21 unit: def init_model_randomly() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 61 LOC McCabe index: 20 number of parameters: 2 id: 22 unit: def parse_ckpt_path() file: src/nanotron/serialize/main.py start line: 0 end line: 0 size: 61 LOC McCabe index: 8 number of parameters: 2 id: 23 unit: def write_to_csv() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 59 LOC McCabe index: 17 number of parameters: 4 id: 24 unit: def _forward_packed() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 59 LOC McCabe index: 8 number of parameters: 5 id: 25 unit: def get_args() file: tools/preprocess_data.py start line: 0 end line: 0 size: 56 LOC McCabe index: 3 number of parameters: 0 id: 26 unit: def _bwd_kernel() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 55 LOC McCabe index: 1 number of parameters: 0 id: 27 unit: def set_logger_verbosity_format() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 55 LOC McCabe index: 25 number of parameters: 2 id: 28 unit: def backward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 52 LOC McCabe index: 11 number of parameters: 2 id: 29 unit: def run() file: scripts/log_lighteval_to_wandb.py start line: 0 end line: 0 size: 49 LOC McCabe index: 13 number of parameters: 1 id: 30 unit: def _partition_parameters() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 48 LOC McCabe index: 20 number of parameters: 1 id: 31 unit: def fp32_accum_hook() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 48 LOC McCabe index: 10 number of parameters: 2 id: 32 unit: def get_cpu_logitems() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 48 LOC McCabe index: 6 number of parameters: 0 id: 33 unit: def eval_single_checkpoint() file: src/nanotron/eval/one_job_runner.py start line: 0 end line: 0 size: 47 LOC McCabe index: 11 number of parameters: 2 id: 34 unit: def new_build_nanoset_index() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 46 LOC McCabe index: 6 number of parameters: 1 id: 35 unit: def _init_parallel_groups() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 46 LOC McCabe index: 15 number of parameters: 1 id: 36 unit: def main() file: slurm_launcher.py start line: 0 end line: 0 size: 45 LOC McCabe index: 10 number of parameters: 0 id: 37 unit: def _fwd_kernel() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 44 LOC McCabe index: 1 number of parameters: 0 id: 38 unit: def custom_load_state_dict() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 44 LOC McCabe index: 20 number of parameters: 4 id: 39 unit: def save_checkpoint() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 44 LOC McCabe index: 4 number of parameters: 1 id: 40 unit: def __call__() file: src/nanotron/generation/sampler.py start line: 0 end line: 0 size: 43 LOC McCabe index: 7 number of parameters: 2 id: 41 unit: def push_to_wandb() file: src/nanotron/eval/upload_to_wandb.py start line: 0 end line: 0 size: 41 LOC McCabe index: 15 number of parameters: 6 id: 42 unit: def init_model_randomly() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 41 LOC McCabe index: 9 number of parameters: 2 id: 43 unit: def init_model_randomly() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 41 LOC McCabe index: 9 number of parameters: 2 id: 44 unit: def save_experiment_configs() file: scripts/scaling_benchmarks.py start line: 0 end line: 0 size: 41 LOC McCabe index: 6 number of parameters: 3 id: 45 unit: def get_warmup_percentages() file: scripts/weka.py start line: 0 end line: 0 size: 41 LOC McCabe index: 9 number of parameters: 2 id: 46 unit: void build_blending_indices() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 35 end line: 97 size: 40 LOC McCabe index: 7 number of parameters: 7 id: 47 unit: def _load_model_checkpoint() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 40 LOC McCabe index: 11 number of parameters: 2 id: 48 unit: def pre_save_checkpoint() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 40 LOC McCabe index: 10 number of parameters: 1 id: 49 unit: def _get_dataset_mix() file: src/nanotron/data/processing.py start line: 0 end line: 0 size: 39 LOC McCabe index: 11 number of parameters: 3 id: 50 unit: def _bwd_kernel_one_col_block() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 36 LOC McCabe index: 1 number of parameters: 0 id: 51 unit: def _recv_meta() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 36 LOC McCabe index: 2 number of parameters: 3 id: 52 unit: def _update_cos_sin_cache() file: src/nanotron/nn/rotary.py start line: 0 end line: 0 size: 35 LOC McCabe index: 17 number of parameters: 4 id: 53 unit: def compute_global_hidden_layer_metrics() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 34 LOC McCabe index: 16 number of parameters: 2 id: 54 unit: def _build_sample_idx() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 34 LOC McCabe index: 11 number of parameters: 7 id: 55 unit: def __call__() file: src/nanotron/generation/sampler.py start line: 0 end line: 0 size: 34 LOC McCabe index: 8 number of parameters: 2 id: 56 unit: def __post_init__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 33 LOC McCabe index: 9 number of parameters: 1 id: 57 unit: def __post_init__() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 33 LOC McCabe index: 10 number of parameters: 1 id: 58 unit: def run_communication() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 33 LOC McCabe index: 9 number of parameters: 2 id: 59 unit: def writer() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 32 LOC McCabe index: 2 number of parameters: 3 id: 60 unit: def __init__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 32 LOC McCabe index: 2 number of parameters: 3 id: 61 unit: def _send_meta() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 32 LOC McCabe index: 2 number of parameters: 4 id: 62 unit: def serialize() file: src/nanotron/config/utils_config.py start line: 0 end line: 0 size: 31 LOC McCabe index: 16 number of parameters: 1 id: 63 unit: def sanity_check_dataloader() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 31 LOC McCabe index: 7 number of parameters: 3 id: 64 unit: def process_sft() file: src/nanotron/data/sft_processing.py start line: 0 end line: 0 size: 30 LOC McCabe index: 13 number of parameters: 3 id: 65 unit: def __call__() file: src/nanotron/generation/sampler.py start line: 0 end line: 0 size: 30 LOC McCabe index: 9 number of parameters: 2 id: 66 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 30 LOC McCabe index: 15 number of parameters: 1 id: 67 unit: def backward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 30 LOC McCabe index: 7 number of parameters: 2 id: 68 unit: def _all_gather_params() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 29 LOC McCabe index: 11 number of parameters: 1 id: 69 unit: def checkpoint_method() file: src/nanotron/utils.py start line: 0 end line: 0 size: 29 LOC McCabe index: 7 number of parameters: 1 id: 70 unit: def _forward_training() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 29 LOC McCabe index: 2 number of parameters: 7 id: 71 unit: def __iter__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 28 LOC McCabe index: 10 number of parameters: 1 id: 72 unit: def __init__() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 28 LOC McCabe index: 4 number of parameters: 5 id: 73 unit: def _build_dataset() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 27 LOC McCabe index: 2 number of parameters: 2 id: 74 unit: py::array build_mapping() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 453 end line: 480 size: 27 LOC McCabe index: 4 number of parameters: 9 id: 75 unit: def forward() file: src/nanotron/nn/rotary.py start line: 0 end line: 0 size: 27 LOC McCabe index: 16 number of parameters: 4 id: 76 unit: def apply_rotary_pos_emb() file: src/nanotron/nn/rotary.py start line: 0 end line: 0 size: 27 LOC McCabe index: 9 number of parameters: 6 id: 77 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 27 LOC McCabe index: 5 number of parameters: 1 id: 78 unit: def init_model() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 27 LOC McCabe index: 4 number of parameters: 1 id: 79 unit: def update() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 27 LOC McCabe index: 8 number of parameters: 1 id: 80 unit: def compute_tensor_stats() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 26 LOC McCabe index: 2 number of parameters: 1 id: 81 unit: def all_ring_pass() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 26 LOC McCabe index: 7 number of parameters: 5 id: 82 unit: def run_communication() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 26 LOC McCabe index: 12 number of parameters: 2 id: 83 unit: def get_train_valid_test_split_() file: src/nanotron/data/nemo_dataset/dataset_utils.py start line: 0 end line: 0 size: 25 LOC McCabe index: 10 number of parameters: 2 id: 84 unit: def unflatten_varlen_lse() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 25 LOC McCabe index: 1 number of parameters: 3 id: 85 unit: def unflatten_varlen_lse() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 25 LOC McCabe index: 1 number of parameters: 3 id: 86 unit: def basic_optimizer_builder() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 25 LOC McCabe index: 3 number of parameters: 1 id: 87 unit: def test_equal_dict() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 25 LOC McCabe index: 5 number of parameters: 3 id: 88 unit: def get_profiler() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 25 LOC McCabe index: 4 number of parameters: 1 id: 89 unit: def distributed_wait_for_completion() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 25 LOC McCabe index: 12 number of parameters: 2 id: 90 unit: def collect_parameter_metrics() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 24 LOC McCabe index: 13 number of parameters: 2 id: 91 unit: def prepare_sft_dataset() file: src/nanotron/data/sft_processing.py start line: 0 end line: 0 size: 24 LOC McCabe index: 11 number of parameters: 5 id: 92 unit: def test_index_blending() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 24 LOC McCabe index: 3 number of parameters: 1 id: 93 unit: def backward() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 24 LOC McCabe index: 1 number of parameters: 3 id: 94 unit: def end() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 24 LOC McCabe index: 11 number of parameters: 1 id: 95 unit: def flush() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 24 LOC McCabe index: 5 number of parameters: 1 id: 96 unit: def main() file: tools/preprocess_data.py start line: 0 end line: 0 size: 24 LOC McCabe index: 2 number of parameters: 1 id: 97 unit: py::array build_blocks_mapping() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 700 end line: 723 size: 23 LOC McCabe index: 4 number of parameters: 9 id: 98 unit: def sync_gradients_across_dp() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 23 LOC McCabe index: 7 number of parameters: 4 id: 99 unit: def __call__() file: src/nanotron/generation/sampler.py start line: 0 end line: 0 size: 23 LOC McCabe index: 7 number of parameters: 2 id: 100 unit: def isend_tensors() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 23 LOC McCabe index: 5 number of parameters: 4 id: 101 unit: def init_rotary_embeddings() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 23 LOC McCabe index: 5 number of parameters: 1 id: 102 unit: def _post_uploading() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 23 LOC McCabe index: 9 number of parameters: 1 id: 103 unit: def __iter__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 22 LOC McCabe index: 9 number of parameters: 1 id: 104 unit: def build_dataset() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 21 LOC McCabe index: 3 number of parameters: 2 id: 105 unit: def elapsed() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 21 LOC McCabe index: 8 number of parameters: 1 id: 106 unit: def initialize_torch_distributed() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 21 LOC McCabe index: 3 number of parameters: 0 id: 107 unit: def llama3_flash_attn_varlen_backward() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 20 LOC McCabe index: 1 number of parameters: 18 id: 108 unit: def flatten_varlen_lse() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 20 LOC McCabe index: 1 number of parameters: 2 id: 109 unit: def backward() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 20 LOC McCabe index: 1 number of parameters: 3 id: 110 unit: def flatten_varlen_lse() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 20 LOC McCabe index: 1 number of parameters: 2 id: 111 unit: def print_config_details() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 20 LOC McCabe index: 1 number of parameters: 1 id: 112 unit: def from_metadata() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 20 LOC McCabe index: 1 number of parameters: 3 id: 113 unit: def forward() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 19 LOC McCabe index: 1 number of parameters: 0 id: 114 unit: def all_gather_variable_dim() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 19 LOC McCabe index: 3 number of parameters: 3 id: 115 unit: def start() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 19 LOC McCabe index: 8 number of parameters: 1 id: 116 unit: def post_init() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 19 LOC McCabe index: 6 number of parameters: 1 id: 117 unit: def forward() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 19 LOC McCabe index: 7 number of parameters: 2 id: 118 unit: def get_flops_per_sec() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 19 LOC McCabe index: 2 number of parameters: 4 id: 119 unit: def get_flops_per_sec() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 19 LOC McCabe index: 3 number of parameters: 4 id: 120 unit: def _post_downloading() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 19 LOC McCabe index: 10 number of parameters: 1 id: 121 unit: def _post_removing() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 19 LOC McCabe index: 6 number of parameters: 1 id: 122 unit: def __init__() file: src/nanotron/fp8/linear.py start line: 0 end line: 0 size: 19 LOC McCabe index: 5 number of parameters: 5 id: 123 unit: def llama3_flash_attn_varlen_func() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 18 LOC McCabe index: 1 number of parameters: 14 id: 124 unit: def log_libraries_versions() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 18 LOC McCabe index: 2 number of parameters: 1 id: 125 unit: def log_all() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 18 LOC McCabe index: 10 number of parameters: 4 id: 126 unit: def forward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 18 LOC McCabe index: 3 number of parameters: 3 id: 127 unit: def forward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 18 LOC McCabe index: 3 number of parameters: 3 id: 128 unit: def tie_custom_params() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 18 LOC McCabe index: 5 number of parameters: 1 id: 129 unit: def __iter__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 17 LOC McCabe index: 8 number of parameters: 1 id: 130 unit: def llama3_flash_attn_varlen_forward() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 17 LOC McCabe index: 1 number of parameters: 15 id: 131 unit: def llama3_flash_attn_varlen_kvpacked_func() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 17 LOC McCabe index: 1 number of parameters: 13 id: 132 unit: def flash_attn_forward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 17 LOC McCabe index: 1 number of parameters: 0 id: 133 unit: def flash_attn_backward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 17 LOC McCabe index: 1 number of parameters: 0 id: 134 unit: def get_logger() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 17 LOC McCabe index: 8 number of parameters: 2 id: 135 unit: def get_position_ids() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 17 LOC McCabe index: 9 number of parameters: 2 id: 136 unit: def cos_sin() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 17 LOC McCabe index: 3 number of parameters: 5 id: 137 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 17 LOC McCabe index: 4 number of parameters: 4 id: 138 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 17 LOC McCabe index: 3 number of parameters: 2 id: 139 unit: def _start_removing() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 17 LOC McCabe index: 8 number of parameters: 1 id: 140 unit: def __init__() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 17 LOC McCabe index: 3 number of parameters: 2 id: 141 unit: def create_config() file: scripts/scaling_benchmarks.py start line: 0 end line: 0 size: 17 LOC McCabe index: 1 number of parameters: 0 id: 142 unit: def __init__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 16 id: 143 unit: def get_tb_dataloader() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 0 id: 144 unit: def __getitem__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 16 LOC McCabe index: 4 number of parameters: 2 id: 145 unit: def llama3_flash_attn_varlen_qkvpacked_func() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 12 id: 146 unit: def ring_flash_attn_varlen_backward() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 14 id: 147 unit: def ring_flash_attn_varlen_func() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 11 id: 148 unit: def _bwd_preprocess_do_o_dot() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 0 id: 149 unit: def log() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 16 LOC McCabe index: 8 number of parameters: 5 id: 150 unit: def get_all_comps() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 16 LOC McCabe index: 6 number of parameters: 1 id: 151 unit: def _accumulate_grad() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 16 LOC McCabe index: 2 number of parameters: 3 id: 152 unit: def __post_init__() file: src/nanotron/config/lighteval_config.py start line: 0 end line: 0 size: 16 LOC McCabe index: 6 number of parameters: 1 id: 153 unit: def check_buffers_empty() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 16 LOC McCabe index: 1 number of parameters: 1 id: 154 unit: def get_flops_per_sec() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 16 LOC McCabe index: 2 number of parameters: 4 id: 155 unit: def _core_forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 16 LOC McCabe index: 6 number of parameters: 2 id: 156 unit: def _start_downloading() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 16 LOC McCabe index: 4 number of parameters: 2 id: 157 unit: def build_nanoset_index() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 15 LOC McCabe index: 5 number of parameters: 1 id: 158 unit: def get_train_dataloader() file: src/nanotron/data/dataloader.py start line: 0 end line: 0 size: 15 LOC McCabe index: 1 number of parameters: 0 id: 159 unit: def _build_shuffle_idx() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 3 id: 160 unit: def forward() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 15 LOC McCabe index: 1 number of parameters: 0 id: 161 unit: def ring_flash_attn_cuda() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 15 LOC McCabe index: 1 number of parameters: 0 id: 162 unit: def set_formatter() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 15 LOC McCabe index: 6 number of parameters: 1 id: 163 unit: def load_state_dict() file: src/nanotron/optim/named_optimizer.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 4 id: 164 unit: def _set_grad() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 2 id: 165 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 15 LOC McCabe index: 6 number of parameters: 1 id: 166 unit: def create_empty_storage() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 2 id: 167 unit: def _send_recv_second_metadata() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 15 LOC McCabe index: 5 number of parameters: 2 id: 168 unit: def _send_recv_data() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 15 LOC McCabe index: 4 number of parameters: 2 id: 169 unit: def __init__() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 15 LOC McCabe index: 1 number of parameters: 0 id: 170 unit: def create_new_group() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 15 LOC McCabe index: 4 number of parameters: 2 id: 171 unit: def get_sliced_parameter() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 15 LOC McCabe index: 3 number of parameters: 2 id: 172 unit: def _acquire_lock() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 15 LOC McCabe index: 4 number of parameters: 2 id: 173 unit: def save_meta() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 15 LOC McCabe index: 2 number of parameters: 3 id: 174 unit: def build_dataset() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 0 id: 175 unit: def get_indexed_dataset() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 2 id: 176 unit: def _core_forward() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 14 LOC McCabe index: 4 number of parameters: 2 id: 177 unit: def forward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 0 id: 178 unit: def _init_model_instance() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 1 id: 179 unit: def get_pp_rank_of() file: src/nanotron/parallel/pipeline_parallel/utils.py start line: 0 end line: 0 size: 14 LOC McCabe index: 6 number of parameters: 2 id: 180 unit: def attach_pipeline_state_to_model() file: src/nanotron/parallel/pipeline_parallel/context_manager.py start line: 0 end line: 0 size: 14 LOC McCabe index: 4 number of parameters: 2 id: 181 unit: def to_first_metadata() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 14 LOC McCabe index: 2 number of parameters: 2 id: 182 unit: def backward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 14 LOC McCabe index: 3 number of parameters: 3 id: 183 unit: def create_pg_for_tied_weights() file: src/nanotron/parallel/tied_parameters.py start line: 0 end line: 0 size: 14 LOC McCabe index: 6 number of parameters: 2 id: 184 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 3 id: 185 unit: def named_modules_in_pp_rank() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 14 LOC McCabe index: 5 number of parameters: 1 id: 186 unit: def _dispatch_tokens() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 14 LOC McCabe index: 5 number of parameters: 4 id: 187 unit: def backward() file: src/nanotron/fp8/linear.py start line: 0 end line: 0 size: 14 LOC McCabe index: 2 number of parameters: 3 id: 188 unit: def load_meta() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 2 id: 189 unit: def get_checkpoint_version() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 14 LOC McCabe index: 2 number of parameters: 3 id: 190 unit: def get_args() file: scripts/log_lighteval_to_wandb.py start line: 0 end line: 0 size: 14 LOC McCabe index: 1 number of parameters: 0 id: 191 unit: def __init__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 192 unit: def get_tb_datasets() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 193 unit: def _stream_file() file: src/nanotron/data/s3_utils.py start line: 0 end line: 0 size: 13 LOC McCabe index: 4 number of parameters: 4 id: 194 unit: def print_nanoset_info() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 13 LOC McCabe index: 2 number of parameters: 1 id: 195 unit: def __init__() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 196 unit: def build_nanoset_dataloader() file: src/nanotron/data/dataloader_builder.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 197 unit: def ring_flash_attn_varlen_forward() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 11 id: 198 unit: def flex_attention_forward() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 199 unit: def zero_grad() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 13 LOC McCabe index: 9 number of parameters: 1 id: 200 unit: def __new__() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 4 id: 201 unit: def forward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 13 LOC McCabe index: 3 number of parameters: 6 id: 202 unit: def unshape() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 13 LOC McCabe index: 2 number of parameters: 1 id: 203 unit: def get_named_params_with_correct_tied() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 13 LOC McCabe index: 3 number of parameters: 1 id: 204 unit: def __init__() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 205 unit: def __new__() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 13 LOC McCabe index: 3 number of parameters: 3 id: 206 unit: def __eq__() file: src/nanotron/random.py start line: 0 end line: 0 size: 13 LOC McCabe index: 9 number of parameters: 2 id: 207 unit: def branch_random_state() file: src/nanotron/random.py start line: 0 end line: 0 size: 13 LOC McCabe index: 2 number of parameters: 3 id: 208 unit: def _parametrize_mup_weight() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 13 LOC McCabe index: 4 number of parameters: 3 id: 209 unit: def save() file: src/nanotron/serialize/main.py start line: 0 end line: 0 size: 13 LOC McCabe index: 1 number of parameters: 0 id: 210 unit: def _get_next_from_stream() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 1 id: 211 unit: def __len__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 1 id: 212 unit: def __len__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 1 id: 213 unit: def __len__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 1 id: 214 unit: def group_texts() file: src/nanotron/data/processing.py start line: 0 end line: 0 size: 12 LOC McCabe index: 6 number of parameters: 2 id: 215 unit: def _build_index_mappings() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 0 id: 216 unit: def permute() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 0 id: 217 unit: inline int32_t get_target_sample_len() file: src/nanotron/data/nemo_dataset/helpers.cpp start line: 201 end line: 213 size: 12 LOC McCabe index: 3 number of parameters: 3 id: 218 unit: def _bwd_store_dk_dv() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 0 id: 219 unit: def set_verbosity() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 12 LOC McCabe index: 7 number of parameters: 1 id: 220 unit: def human_format() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 12 LOC McCabe index: 5 number of parameters: 3 id: 221 unit: def grad_optimizer_builder() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 1 id: 222 unit: def __init__() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 4 id: 223 unit: def attach_store() file: src/nanotron/generation/generate_store.py start line: 0 end line: 0 size: 12 LOC McCabe index: 4 number of parameters: 2 id: 224 unit: def _calculate_model_params() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 12 LOC McCabe index: 2 number of parameters: 1 id: 225 unit: def __post_init__() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 12 LOC McCabe index: 6 number of parameters: 1 id: 226 unit: def __new__() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 12 LOC McCabe index: 2 number of parameters: 3 id: 227 unit: def __init__() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 0 id: 228 unit: def ddp_trigger_sync_in_bwd() file: src/nanotron/parallel/data_parallel/utils.py start line: 0 end line: 0 size: 12 LOC McCabe index: 1 number of parameters: 1 id: 229 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 12 LOC McCabe index: 3 number of parameters: 3 id: 230 unit: def log_modules() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 12 LOC McCabe index: 3 number of parameters: 4 id: 231 unit: def assert_fail_except_rank_with() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 12 LOC McCabe index: 5 number of parameters: 3 id: 232 unit: def state_dict_to_device() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 12 LOC McCabe index: 3 number of parameters: 2 id: 233 unit: def update_checkpoint() file: scripts/fix_checkpoint_bad_naming.py start line: 0 end line: 0 size: 12 LOC McCabe index: 5 number of parameters: 1 id: 234 unit: def check_params() file: scripts/scaling_benchmarks.py start line: 0 end line: 0 size: 12 LOC McCabe index: 3 number of parameters: 1 id: 235 unit: def warmup_datasets() file: scripts/weka.py start line: 0 end line: 0 size: 12 LOC McCabe index: 5 number of parameters: 2 id: 236 unit: def get_attribute_by_path() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 11 LOC McCabe index: 5 number of parameters: 2 id: 237 unit: def collect_embeddings_metrics() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 11 LOC McCabe index: 4 number of parameters: 2 id: 238 unit: def __init__() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 239 unit: def _build_doc_idx() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 11 LOC McCabe index: 3 number of parameters: 4 id: 240 unit: def __init__() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 11 LOC McCabe index: 2 number of parameters: 2 id: 241 unit: def __init__() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 11 LOC McCabe index: 2 number of parameters: 2 id: 242 unit: def flash_attention_forward() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 243 unit: def get_attention_mask() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 11 LOC McCabe index: 3 number of parameters: 2 id: 244 unit: def _get_default_logging_level() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 11 LOC McCabe index: 3 number of parameters: 0 id: 245 unit: def log_memory() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 2 id: 246 unit: def _vocab_size_with_padding() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 11 LOC McCabe index: 2 number of parameters: 3 id: 247 unit: def decode_text() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 248 unit: def __post_init__() file: src/nanotron/config/parallelism_config.py start line: 0 end line: 0 size: 11 LOC McCabe index: 6 number of parameters: 1 id: 249 unit: def post_save_checkpoint() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 11 LOC McCabe index: 7 number of parameters: 1 id: 250 unit: def view_as_contiguous() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 1 id: 251 unit: def __init__() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 252 unit: def _mark_all_parameters_in_module_as_sharded() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 11 LOC McCabe index: 3 number of parameters: 2 id: 253 unit: def init_rotary_embeddings() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 1 id: 254 unit: def init_rotary_embeddings() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 11 LOC McCabe index: 5 number of parameters: 1 id: 255 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 0 id: 256 unit: def get_state_as_int() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 11 LOC McCabe index: 5 number of parameters: 1 id: 257 unit: def convert_torch_dtype_to_te_dtype() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 11 LOC McCabe index: 1 number of parameters: 1 id: 258 unit: def __getitem__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 10 LOC McCabe index: 10 number of parameters: 2 id: 259 unit: def get_sampler() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 260 unit: def __init__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 261 unit: def dummy_infinite_data_generator() file: src/nanotron/data/dataloader.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 262 unit: def _do_init() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 3 id: 263 unit: def build_dataset() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 264 unit: def build_train_valid_test_datasets() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 265 unit: def _build_train_valid_test_datasets() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 266 unit: def flatten_kernel() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 267 unit: def unflatten_kernel() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 268 unit: def send_recv() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 3 id: 269 unit: def flatten_kernel() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 270 unit: def unflatten_kernel() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 271 unit: def send_recv() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 3 id: 272 unit: def __init__() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 3 id: 273 unit: def send_and_receive_() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 5 id: 274 unit: def forward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 4 id: 275 unit: def sdpa_attention_forward() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 276 unit: def get_tbi_logs() file: src/nanotron/logging/logmixin.py start line: 0 end line: 0 size: 10 LOC McCabe index: 9 number of parameters: 2 id: 277 unit: def log_rank() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 278 unit: def decode_tokenized() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 279 unit: def find_free_port() file: src/nanotron/utils.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 2 id: 280 unit: def check_buffers_empty() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 1 id: 281 unit: def get_sort_key() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 1 id: 282 unit: def add_send() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 4 id: 283 unit: def forward() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 2 id: 284 unit: def _mark_all_parameters_in_module_as_tied() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 10 LOC McCabe index: 4 number of parameters: 1 id: 285 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 4 id: 286 unit: def get_block_compute_costs() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 1 id: 287 unit: def get_block_compute_costs() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 1 id: 288 unit: def get_flops() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 289 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 290 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 4 id: 291 unit: def get_block_compute_costs() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 1 id: 292 unit: def _popen() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 10 LOC McCabe index: 2 number of parameters: 2 id: 293 unit: def get_current_stdout() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 10 LOC McCabe index: 5 number of parameters: 1 id: 294 unit: def _parametrize_grouped_mlp() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 10 LOC McCabe index: 4 number of parameters: 3 id: 295 unit: def _get_mup_lr() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 10 LOC McCabe index: 3 number of parameters: 3 id: 296 unit: def __post_init__() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 10 LOC McCabe index: 6 number of parameters: 1 id: 297 unit: def get_args() file: run_train.py start line: 0 end line: 0 size: 10 LOC McCabe index: 1 number of parameters: 0 id: 298 unit: def compute_kurtosis() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 1 id: 299 unit: def __init__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 300 unit: def __init__() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 301 unit: def make_indexed_dataset() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 2 id: 302 unit: def print_split_stats() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 2 id: 303 unit: def rotate_half() file: src/nanotron/nn/rotary.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 2 id: 304 unit: def __init__() file: src/nanotron/nn/rotary.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 305 unit: def split_by_rank() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 1 id: 306 unit: def lengths_to_offsets() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 2 id: 307 unit: def generate_doc_mask_mod() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 2 id: 308 unit: def _create_timer_decorator() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 5 id: 309 unit: def create_table_log() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 310 unit: def create_table_output() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 9 LOC McCabe index: 4 number of parameters: 2 id: 311 unit: def generator() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 0 id: 312 unit: def generator() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 0 id: 313 unit: def get_parameter_and_parent_module() file: src/nanotron/utils.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 2 id: 314 unit: def cast_pipeline_engine_to_str() file: src/nanotron/config/utils_config.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 1 id: 315 unit: def backward() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 2 id: 316 unit: def _send_first_metadata_p2p_op() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 4 id: 317 unit: def _recv_first_metadata_p2p_op() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 3 id: 318 unit: def _send_second_metadata_p2p_op() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 4 id: 319 unit: def forward() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 2 id: 320 unit: def __init__() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 321 unit: def forward() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 2 id: 322 unit: def backward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 2 id: 323 unit: def initial_sync() file: src/nanotron/parallel/utils.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 2 id: 324 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 4 id: 325 unit: def pad_to_right() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 9 LOC McCabe index: 5 number of parameters: 3 id: 326 unit: def get_flops() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 327 unit: def pad_to_right() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 9 LOC McCabe index: 5 number of parameters: 3 id: 328 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 0 id: 329 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 4 id: 330 unit: def __init__() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 3 id: 331 unit: def ignore_init_on_device_and_dtype() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 9 LOC McCabe index: 3 number of parameters: 0 id: 332 unit: def get_flops() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 333 unit: def _reset_state() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 1 id: 334 unit: def start_uploading() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 9 LOC McCabe index: 4 number of parameters: 2 id: 335 unit: def _compute_scaling_factor() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 9 LOC McCabe index: 4 number of parameters: 1 id: 336 unit: def __init__() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 2 id: 337 unit: def process_type() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 9 LOC McCabe index: 4 number of parameters: 4 id: 338 unit: def convert_to_string() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 9 LOC McCabe index: 7 number of parameters: 1 id: 339 unit: def read_checkpoint_version_from_shard_file() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 9 LOC McCabe index: 2 number of parameters: 1 id: 340 unit: def generate_slurm_script() file: scripts/scaling_benchmarks.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 341 unit: def parse_input_paths() file: scripts/weka.py start line: 0 end line: 0 size: 9 LOC McCabe index: 8 number of parameters: 2 id: 342 unit: def generate_model_config() file: slurm_launcher.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 343 unit: def create_slurm_script() file: slurm_launcher.py start line: 0 end line: 0 size: 9 LOC McCabe index: 1 number of parameters: 0 id: 344 unit: def __init__() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 2 id: 345 unit: def _get_new_stream() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 2 id: 346 unit: def get_consumption_stats() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 1 id: 347 unit: def _num_epochs() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 8 LOC McCabe index: 5 number of parameters: 4 id: 348 unit: def __init__() file: src/nanotron/nn/rotary.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 349 unit: def _get_default_args() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 1 id: 350 unit: def unflatten_varlen_lse() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 3 id: 351 unit: def _get_default_args() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 1 id: 352 unit: def unflatten_varlen_lse() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 3 id: 353 unit: def reset() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 1 id: 354 unit: def total_time() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 1 id: 355 unit: def init_random_states() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 2 id: 356 unit: def log_throughput() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 357 unit: def state_dict() file: src/nanotron/optim/named_optimizer.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 1 id: 358 unit: def __init__() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 4 id: 359 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 1 id: 360 unit: def __post_init__() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 1 id: 361 unit: def get_input_output_pp_ranks() file: src/nanotron/parallel/pipeline_parallel/utils.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 1 id: 362 unit: def _send_data_p2p_op() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 4 id: 363 unit: def _set_metadata() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 3 id: 364 unit: def column_linear() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 365 unit: def __init__() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 366 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 4 id: 367 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 4 id: 368 unit: def check_model_has_grad() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 2 id: 369 unit: def _combine_expert_outputs() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 4 id: 370 unit: def check_optim_state_in_sync() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 8 LOC McCabe index: 4 number of parameters: 2 id: 371 unit: def _inner() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 4 id: 372 unit: def __new__() file: src/nanotron/fp8/parameter.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 4 id: 373 unit: def set_random_state() file: src/nanotron/random.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 1 id: 374 unit: def _parametrize_row_linear() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 8 LOC McCabe index: 3 number of parameters: 3 id: 375 unit: def __init__() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 4 id: 376 unit: def load_random_states() file: src/nanotron/serialize/random.py start line: 0 end line: 0 size: 8 LOC McCabe index: 2 number of parameters: 2 id: 377 unit: def estimate_num_params() file: scripts/scaling_benchmarks.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 378 unit: def get_dataloader_from_data_stage() file: run_train.py start line: 0 end line: 0 size: 8 LOC McCabe index: 1 number of parameters: 0 id: 379 unit: def _format_paths() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 7 LOC McCabe index: 4 number of parameters: 3 id: 380 unit: def clm_process() file: src/nanotron/data/processing.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 381 unit: def compile_helper() file: src/nanotron/data/nemo_dataset/dataset_utils.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 0 id: 382 unit: def update_consumption_metrics() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 7 LOC McCabe index: 4 number of parameters: 4 id: 383 unit: def get_ds_sample_idx() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 2 id: 384 unit: def get() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 4 id: 385 unit: def merge_file_() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 2 id: 386 unit: def wait() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 387 unit: def wait() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 388 unit: def __init__() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 1 id: 389 unit: def document_mask_direct() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 4 id: 390 unit: def sliding_window_causal() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 4 id: 391 unit: def get_block_mask_from_type() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 392 unit: def __init__() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 5 id: 393 unit: def clear_all_tbi_logs() file: src/nanotron/logging/logmixin.py start line: 0 end line: 0 size: 7 LOC McCabe index: 4 number of parameters: 1 id: 394 unit: def add_scalars_from_list() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 3 id: 395 unit: def set_ranks_logging_level() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 7 LOC McCabe index: 4 number of parameters: 2 id: 396 unit: def _process_value_according_to_param_policy() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 397 unit: def no_sync() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 1 id: 398 unit: def zero_grad() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 7 LOC McCabe index: 5 number of parameters: 1 id: 399 unit: def micro_batcher() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 400 unit: def get_local_store() file: src/nanotron/generation/generate_store.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 401 unit: def main_rank_first() file: src/nanotron/utils.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 402 unit: def local_ranks_zero_first() file: src/nanotron/utils.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 403 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 404 unit: def save_as_yaml() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 3 id: 405 unit: def cast_str_to_pipeline_engine() file: src/nanotron/config/utils_config.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 406 unit: def __post_init__() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 1 id: 407 unit: def validation_step() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 4 id: 408 unit: def train_batch_iter() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 409 unit: def train_batch_iter() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 410 unit: def train_batch_iter() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 411 unit: def forward() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 412 unit: def send_to_pipeline_state_buffer() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 4 id: 413 unit: def __init__() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 414 unit: def reshape() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 7 LOC McCabe index: 4 number of parameters: 2 id: 415 unit: def forward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 416 unit: def row_linear() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 417 unit: def get() file: src/nanotron/parallel/utils.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 4 id: 418 unit: def get_global_rank() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 419 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 13 id: 420 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 13 id: 421 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 422 unit: def register_empty_parameter() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 3 id: 423 unit: def register_empty_buffer() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 4 id: 424 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 425 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 426 unit: def start_downloading() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 1 id: 427 unit: def update_output() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 7 LOC McCabe index: 3 number of parameters: 0 id: 428 unit: def forward() file: src/nanotron/fp8/linear.py start line: 0 end line: 0 size: 7 LOC McCabe index: 8 number of parameters: 3 id: 429 unit: def get_current_random_state() file: src/nanotron/random.py start line: 0 end line: 0 size: 7 LOC McCabe index: 2 number of parameters: 0 id: 430 unit: def from_str_dict() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 3 id: 431 unit: def load_optimizer() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 9 id: 432 unit: def get_checkpoint_paths_list() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 7 LOC McCabe index: 1 number of parameters: 0 id: 433 unit: def _get_s3_object() file: src/nanotron/data/s3_utils.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 2 id: 434 unit: def deallocate_indexed_dataset_memory() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 1 id: 435 unit: def __init__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 3 id: 436 unit: def update_out_and_lse() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 437 unit: def flatten_varlen_lse() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 2 id: 438 unit: def send_recv_kv() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 439 unit: def update_out_and_lse() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 440 unit: def flatten_varlen_lse() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 2 id: 441 unit: def send_recv_kv() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 442 unit: def __init__() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 2 id: 443 unit: def __init__() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 3 id: 444 unit: def __init__() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 1 id: 445 unit: def __init__() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 446 unit: def forward() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 2 id: 447 unit: def circular_rank_left() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 4 id: 448 unit: def circular_rank_right() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 4 id: 449 unit: def ring_pass() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 450 unit: def all_gather_same_dim() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 1 id: 451 unit: def is_flash_attn_greater_or_equal_2_10() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 0 id: 452 unit: def score_mod() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 5 id: 453 unit: def document_mask_from_positions() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 4 id: 454 unit: def _offsets_to_doc_ids_tensor() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 1 id: 455 unit: def forward() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 2 id: 456 unit: def filter() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 6 LOC McCabe index: 5 number of parameters: 2 id: 457 unit: def add_scalar() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 5 id: 458 unit: def __call__() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 7 id: 459 unit: def get_custom_weight_decay_for_named_parameters() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 460 unit: def inherit_from() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 2 id: 461 unit: def clip_grad_norm() file: src/nanotron/optim/clip_grads.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 462 unit: def __torch_dispatch__() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 4 id: 463 unit: def merge_dp_shard_in_zero1_optimizer() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 464 unit: def micro_splitter() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 465 unit: def get_config_from_file() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 466 unit: def __init__() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 467 unit: def train() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 468 unit: def train_step_logs() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 469 unit: def forward() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 470 unit: def forward() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 471 unit: def backward() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 2 id: 472 unit: def recv_from_pipeline_state_buffer() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 3 id: 473 unit: def _reset() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 1 id: 474 unit: def add_recv() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 3 id: 475 unit: def _send_recv_first_metadata() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 6 LOC McCabe index: 4 number of parameters: 1 id: 476 unit: def mark_as_tied() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 477 unit: def sanity_check() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 1 id: 478 unit: def forward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 5 id: 479 unit: def sharded_cross_entropy() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 480 unit: def create_tied_parameter() file: src/nanotron/parallel/tied_parameters.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 481 unit: def sync_gradients_across_dp() file: src/nanotron/parallel/data_parallel/utils.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 482 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 4 id: 483 unit: def apply_rotary_pos_emb() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 6 id: 484 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 485 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 486 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 487 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 488 unit: def reset_parameters() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 1 id: 489 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 490 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 491 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 492 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 493 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 494 unit: def build_model() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 6 id: 495 unit: def patch_tensor_constructor() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 1 id: 496 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 497 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 498 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 2 id: 499 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 500 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 2 id: 501 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 502 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 503 unit: def before_tbi_sanity_checks() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 504 unit: def before_optim_step_sanity_checks() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 505 unit: def reduce_scatter_tensor() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 506 unit: def reduce_scatter_coalesced() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 507 unit: def convert_tensor_from_fp8() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 3 id: 508 unit: def is_fp8_available() file: src/nanotron/fp8/utils.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 0 id: 509 unit: def fp8_matmul_kernel() file: src/nanotron/fp8/kernel.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 510 unit: def set_random_seed() file: src/nanotron/random.py start line: 0 end line: 0 size: 6 LOC McCabe index: 2 number of parameters: 1 id: 511 unit: def _parametrize_column_linear() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 3 id: 512 unit: def _parametrize_layer_norm() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 3 id: 513 unit: def _parametrize_layer_norm() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 3 id: 514 unit: def consumed_tokens_per_dataset_folder_total() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 6 LOC McCabe index: 3 number of parameters: 1 id: 515 unit: def to_str_dict() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 1 id: 516 unit: def get_path() file: src/nanotron/serialize/utils.py start line: 0 end line: 0 size: 6 LOC McCabe index: 1 number of parameters: 0 id: 517 unit: def __getitem__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 5 LOC McCabe index: 4 number of parameters: 2 id: 518 unit: def normalize() file: src/nanotron/data/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 519 unit: def count_dataset_indexes() file: src/nanotron/data/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 520 unit: def __getitem__() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 521 unit: def get_dataloader_worker_init() file: src/nanotron/data/dataloader.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 522 unit: def __best_fitting_dtype() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 523 unit: def code() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 524 unit: def __del__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 525 unit: def exists() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 526 unit: def __init__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 3 id: 527 unit: def get_default_args() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 528 unit: def _update_out_and_lse() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 529 unit: def get_default_args() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 530 unit: def _update_out_and_lse() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 531 unit: def get_activation() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 532 unit: def routing() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 533 unit: def get_attention_mod_from_type() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 534 unit: def validate_attention_args() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 535 unit: def emit() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 536 unit: def __new__() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 537 unit: def __init__() file: src/nanotron/eval/one_job_runner.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 3 id: 538 unit: def run_slurm_one_job() file: src/nanotron/eval/one_job_runner.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 539 unit: def get_custom_lr_for_named_parameters() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 540 unit: def init_optimizer_and_grad_accumulator() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 541 unit: def state_dict_additional_keys() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 542 unit: def get_base_optimizer() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 543 unit: def __init__() file: src/nanotron/optim/optimizer_from_gradient_accumulator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 544 unit: def state_dict() file: src/nanotron/optim/optimizer_from_gradient_accumulator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 545 unit: def __init__() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 546 unit: def _get_grad() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 547 unit: def find_optim_index_from_param_name() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 4 id: 548 unit: def backward() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 549 unit: def step() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 550 unit: def load_state_dict() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 3 id: 551 unit: def chunks() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 552 unit: def all_gather_batches() file: src/nanotron/generation/sampler.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 3 id: 553 unit: def get_untyped_storage() file: src/nanotron/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 554 unit: def tensor_from_untyped_storage() file: src/nanotron/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 555 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 556 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 557 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 558 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 559 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 560 unit: def __post_init__() file: src/nanotron/config/lighteval_config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 561 unit: def cast_str_to_torch_dtype() file: src/nanotron/config/utils_config.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 562 unit: def get_size() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 1 id: 563 unit: def _mark_tied_parameters() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 564 unit: def _get_bwd_context() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 565 unit: def validate_batch_iter() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 566 unit: def forward() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 5 id: 567 unit: def build_and_set_rank() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 568 unit: def __init__() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 3 id: 569 unit: def recv_tensors() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 4 id: 570 unit: def from_str() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 571 unit: def mark_as_sharded() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 572 unit: def forward() file: src/nanotron/parallel/tensor_parallel/functional.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 4 id: 573 unit: def forward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 3 id: 574 unit: def create_sharded_parameter() file: src/nanotron/parallel/sharded_parameters.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 575 unit: def mark_all_parameters_in_module_as_sharded() file: src/nanotron/parallel/sharded_parameters.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 3 id: 576 unit: def tie_parameters() file: src/nanotron/parallel/tied_parameters.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 577 unit: def destroy() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 578 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 579 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 580 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 581 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 582 unit: def get_embeddings_lm_head_tied_names() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 583 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 584 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 585 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 586 unit: def get_embeddings_lm_head_tied_names() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 587 unit: def to() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 5 LOC McCabe index: 4 number of parameters: 3 id: 588 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 589 unit: def _compute_router_probabilities() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 590 unit: def _core_forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 591 unit: def _checkpointed_forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 592 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 593 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 594 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 595 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 596 unit: def get_embeddings_lm_head_tied_names() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 597 unit: def _release_lock() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 5 LOC McCabe index: 4 number of parameters: 1 id: 598 unit: def assert_tensor_synced_across_pg() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 599 unit: def after_tbi_sanity_checks() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 600 unit: def after_optim_step_sanity_checks() file: src/nanotron/sanity_checks.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 601 unit: def update_output() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 0 id: 602 unit: def all_reduce_coalesced() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 603 unit: def all_gather_coalesced() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 604 unit: def get_global_rank() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 605 unit: def get_rank() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 606 unit: def check_type() file: src/nanotron/random.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 2 id: 607 unit: def __setitem__() file: src/nanotron/random.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 3 id: 608 unit: def _parametrize_router() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 5 LOC McCabe index: 3 number of parameters: 3 id: 609 unit: def to_dict() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 4 id: 610 unit: def optimizer_filename() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 611 unit: def lr_scheduler_filename() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 2 id: 612 unit: def save_lr_scheduler() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 613 unit: def load_lr_scheduler() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 614 unit: def extract_tp_pp_rank_from_shard_path() file: src/nanotron/serialize/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 1 id: 615 unit: def merge_and_shard_tp_tensors() file: src/nanotron/serialize/utils.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 616 unit: def load_sharded_param_latest() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 617 unit: def load_weights() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 618 unit: def main() file: scripts/fix_checkpoint_bad_naming.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 0 id: 619 unit: def launch_slurm_job() file: slurm_launcher.py start line: 0 end line: 0 size: 5 LOC McCabe index: 1 number of parameters: 2 id: 620 unit: def tail_output_file() file: slurm_launcher.py start line: 0 end line: 0 size: 5 LOC McCabe index: 2 number of parameters: 1 id: 621 unit: def _get_s3_path_components() file: src/nanotron/data/s3_utils.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 622 unit: def __iter__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 1 id: 623 unit: def __getitem__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 624 unit: def get_start_end_idx() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 625 unit: def _tokenize_and_group_texts() file: src/nanotron/data/processing.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 1 id: 626 unit: def get_datasets() file: src/nanotron/data/processing.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 627 unit: def sanity_check_dataloader() file: src/nanotron/data/dataloader.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 628 unit: def __getitem__() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 629 unit: def _warmup_mmap_file() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 630 unit: def __del__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 631 unit: def add_item() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 632 unit: def finalize() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 633 unit: def commit() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 634 unit: def wait() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 635 unit: def commit() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 636 unit: def wait() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 637 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 638 unit: def __getitem__() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 639 unit: def forward() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 640 unit: def _dispatch_tokens() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 641 unit: def pad_dim_to() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 642 unit: def gather_sizes() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 3 id: 643 unit: def backward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 3 id: 644 unit: def create_document_mask_func() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 645 unit: def create_attention_mask() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 646 unit: def doc_causal_mask() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 4 id: 647 unit: def sliding_window_causal_mask_func() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 5 id: 648 unit: def __init__() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 3 id: 649 unit: def _configure_library_root_logger() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 650 unit: def average_time() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 651 unit: def items() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 652 unit: def normalize_s3_path() file: src/nanotron/eval/one_job_runner.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 653 unit: def get_formatted_value() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 654 unit: def __init__() file: src/nanotron/optim/named_optimizer.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 655 unit: def step() file: src/nanotron/optim/optimizer_from_gradient_accumulator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 3 id: 656 unit: def load_state_dict() file: src/nanotron/optim/optimizer_from_gradient_accumulator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 4 id: 657 unit: def step() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 4 LOC McCabe index: 4 number of parameters: 3 id: 658 unit: def __init__() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 659 unit: def assign_param_offsets() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 6 id: 660 unit: def __call__() file: src/nanotron/utils.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 661 unit: def post_train_step() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 662 unit: def _init_model() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 663 unit: def register_send_activation() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 4 id: 664 unit: def register_recv_activation() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 3 id: 665 unit: def _get_fwd_context() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 666 unit: def backward() file: src/nanotron/parallel/pipeline_parallel/functional.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 667 unit: def send_tensors() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 4 id: 668 unit: def __str__() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 669 unit: def get_full_name_from_model() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 670 unit: def get_tied_info() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 671 unit: def is_tied() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 672 unit: def get_sharded_info() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 673 unit: def is_sharded() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 674 unit: def backward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 675 unit: def create_sharded_parameter_from_config() file: src/nanotron/parallel/sharded_parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 676 unit: def sync_tied_weights_gradients() file: src/nanotron/parallel/tied_parameters.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 677 unit: def set_device() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 1 id: 678 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 679 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 680 unit: def rotate_half() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 681 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 682 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 683 unit: def _core_forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 684 unit: def _checkpointed_forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 685 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 686 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 687 unit: def forward_with_hidden_states() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 688 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 689 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 5 id: 690 unit: def dropout_add() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 4 id: 691 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 692 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 3 id: 693 unit: def forward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 694 unit: def before_tbi_sanity_checks() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 1 id: 695 unit: def should_ignore_init_on_device_and_dtype() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 0 id: 696 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 3 id: 697 unit: def forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 698 unit: def _warning() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 699 unit: def _info() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 700 unit: def wait_for_completion() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 1 id: 701 unit: def get_filesystem_and_path() file: src/nanotron/s3_checkpoints/fsspec.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 702 unit: def convert_tensor_to_fp8() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 703 unit: def __init__() file: src/nanotron/random.py start line: 0 end line: 0 size: 4 LOC McCabe index: 3 number of parameters: 2 id: 704 unit: def __eq__() file: src/nanotron/random.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 2 id: 705 unit: def parametrize() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 706 unit: def _parametrize_embedding() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 707 unit: def _parametrize_embedding() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 708 unit: def get_lr() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 4 LOC McCabe index: 2 number of parameters: 3 id: 709 unit: def save_optimizer() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 710 unit: def read_checkpoint_version_from_meta() file: src/nanotron/serialize/weights.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 2 id: 711 unit: def save_random_states() file: src/nanotron/serialize/random.py start line: 0 end line: 0 size: 4 LOC McCabe index: 1 number of parameters: 0 id: 712 unit: def collect_all_metrics() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 713 unit: def __init__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 4 id: 714 unit: def __getitem__() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 715 unit: def __getitem__() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 716 unit: def __init__() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 717 unit: def all_gather() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 718 unit: def __init__() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 719 unit: def all_gather() file: src/nanotron/nn/ring_attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 720 unit: def __init__() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 721 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 4 id: 722 unit: def forward() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 723 unit: def _combine_expert_outputs() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 4 id: 724 unit: def __init__() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 725 unit: def get_ring_flash_attn_cuda() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 726 unit: def create_softcapped_causal_score_mod() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 727 unit: def create_block_mask_cached() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 6 id: 728 unit: def __init__() file: src/nanotron/logging/logmixin.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 729 unit: def _reset_library_root_logger() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 730 unit: def __enter__() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 731 unit: def __exit__() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 4 id: 732 unit: def reset_all() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 733 unit: def reset() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 2 id: 734 unit: def merge_named_param_groups() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 735 unit: def is_last_stage() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 736 unit: def zero_grad() file: src/nanotron/optim/optimizer_from_gradient_accumulator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 737 unit: def get_sliced_flat_tensor() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 738 unit: def get_sliced_tensor() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 739 unit: def get_numel_of_unsharded_dp_param() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 3 LOC McCabe index: 3 number of parameters: 1 id: 740 unit: def assign_shard_to_buffer() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 741 unit: def get_fp32_accum_hook() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 742 unit: def flush() file: src/nanotron/generation/generate_store.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 743 unit: def _attach_store() file: src/nanotron/generation/generate_store.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 744 unit: def __init__() file: src/nanotron/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 745 unit: def __enter__() file: src/nanotron/utils.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 746 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 747 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 748 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 749 unit: def __post_init__() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 750 unit: def create_empty() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 751 unit: def get_yaml() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 752 unit: def load_from_yaml() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 753 unit: def __post_init__() file: src/nanotron/config/lighteval_config.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 754 unit: def post_training() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 755 unit: def __init__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 756 unit: def get_min_max_rank() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 3 LOC McCabe index: 3 number of parameters: 1 id: 757 unit: def __init__() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 758 unit: def forward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 759 unit: def backward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 760 unit: def backward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 761 unit: def get_local_ranks() file: src/nanotron/parallel/context.py start line: 0 end line: 0 size: 3 LOC McCabe index: 3 number of parameters: 2 id: 762 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 763 unit: def forward() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 764 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 765 unit: def __init__() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 766 unit: def rotate_half() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 767 unit: def shape() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 768 unit: def __init__() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 769 unit: def __new__() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 770 unit: def init_on_device_and_dtype() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 771 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 2 id: 772 unit: def __init__() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 3 id: 773 unit: def is_previous_save_finished() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 1 id: 774 unit: def fs_open() file: src/nanotron/s3_checkpoints/fsspec.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 775 unit: def fs_copy() file: src/nanotron/s3_checkpoints/fsspec.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 776 unit: def get_synced_random_state() file: src/nanotron/random.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 0 id: 777 unit: def __init__() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 3 LOC McCabe index: 1 number of parameters: 4 id: 778 unit: def __post_init__() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 3 LOC McCabe index: 2 number of parameters: 1 id: 779 unit: def compute_tensor_norm() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 780 unit: def compute_zero_fraction() file: src/nanotron/metrics_logging.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 781 unit: def __len__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 782 unit: def __len__() file: src/nanotron/data/tokenized_bytes.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 783 unit: def _get_s3_file_list() file: src/nanotron/data/s3_utils.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 784 unit: def pack_sft_sequences() file: src/nanotron/data/sft_processing.py start line: 0 end line: 0 size: 2 LOC McCabe index: 4 number of parameters: 3 id: 785 unit: def total_length() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 786 unit: def __len__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 787 unit: def __init__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 788 unit: def __len__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 789 unit: def __iter__() file: src/nanotron/data/samplers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 790 unit: def __len__() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 791 unit: def build_nanoset_index_helper() file: src/nanotron/data/nanoset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 792 unit: def set_tensor_pointers() file: src/nanotron/data/dataloader.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 793 unit: def get_datasets_weights_and_num_samples() file: src/nanotron/data/nemo_dataset/dataset_utils.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 794 unit: def __len__() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 795 unit: def __len__() file: src/nanotron/data/nemo_dataset/blendable_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 796 unit: def make_builder() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 6 id: 797 unit: def index_file_path() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 798 unit: def data_file_path() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 799 unit: def dtype() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 800 unit: def sizes() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 801 unit: def doc_idx() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 802 unit: def __getitem__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 803 unit: def __len__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 804 unit: def __getstate__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 805 unit: def __len__() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 806 unit: def sizes() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 807 unit: def doc_idx() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 808 unit: def get_doc_idx() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 809 unit: def set_doc_idx() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 810 unit: def supports_prefetch() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 811 unit: def deallocate_indexed_dataset_memory() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 812 unit: def end_document() file: src/nanotron/data/nemo_dataset/indexed_dataset.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 813 unit: def __len__() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 814 unit: def _num_tokens() file: src/nanotron/data/nemo_dataset/__init__.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 815 unit: def llama3_flash_attn_prepare_cu_seqlens() file: src/nanotron/nn/llama3_ring_attention.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 816 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 817 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 818 unit: def _gelu_python() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 819 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 820 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 821 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 822 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 823 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 824 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 825 unit: def _mish_python() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 826 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 827 unit: def forward() file: src/nanotron/nn/activations.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 828 unit: def __init__() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 829 unit: def gating() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 830 unit: def _checkpointed_forward() file: src/nanotron/nn/moe.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 831 unit: def exists() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 832 unit: def default() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 2 id: 833 unit: def divisible_by() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 834 unit: def exists() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 835 unit: def default() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 2 id: 836 unit: def cast_tuple() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 2 id: 837 unit: def circular_index_left() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 838 unit: def circular_index_right() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 839 unit: def null_ring_pass() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 5 id: 840 unit: def exists() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 841 unit: def default() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 2 id: 842 unit: def divisible_by() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 843 unit: def get_rank() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 0 id: 844 unit: def get_world_size() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 0 id: 845 unit: def is_distributed() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 0 id: 846 unit: def has_only_one_value() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 847 unit: def forward() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 848 unit: def exists() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 849 unit: def default() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 2 id: 850 unit: def is_contiguous() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 851 unit: def grid() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 852 unit: def init_to_zero() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 853 unit: def grid() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 854 unit: def grid() file: src/nanotron/nn/ring_attention_lucidrain.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 855 unit: def is_torch_flex_attn_available() file: src/nanotron/nn/attention.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 0 id: 856 unit: def causal_mask_func() file: src/nanotron/nn/flex_attention.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 857 unit: def forward() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 858 unit: def reset_parameters() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 859 unit: def forward() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 860 unit: def extra_repr() file: src/nanotron/nn/layer_norm.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 861 unit: def tbi_logger() file: src/nanotron/logging/logmixin.py start line: 0 end line: 0 size: 2 LOC McCabe index: 4 number of parameters: 3 id: 862 unit: def _get_internal_logs() file: src/nanotron/logging/logmixin.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 863 unit: def _clear_internal_logs() file: src/nanotron/logging/logmixin.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 864 unit: def get_library_root_logger() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 865 unit: def get_verbosity() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 866 unit: def warn_once() file: src/nanotron/logging/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 867 unit: def enable() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 868 unit: def disable() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 869 unit: def is_enabled() file: src/nanotron/logging/timers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 870 unit: def test_all_pair_to_pair() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 871 unit: def compute_remain_train_steps_of_a_data_stage_from_ckp() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 872 unit: def is_resume_from_training() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 873 unit: def get_consumed_train_samples_of_a_data_stage_from_ckp() file: src/nanotron/helpers.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 874 unit: def __getstate__() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 875 unit: def __setstate__() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 876 unit: def __repr__() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 877 unit: def zero_grad() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 878 unit: def state_dict() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 879 unit: def load_state_dict() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 880 unit: def step() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 881 unit: def param_groups() file: src/nanotron/optim/inherit_from_other_optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 882 unit: def __getstate__() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 883 unit: def __setstate__() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 884 unit: def __repr__() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 885 unit: def zero_grad() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 886 unit: def state_dict_additional_keys() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 887 unit: def state_dict() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 888 unit: def load_state_dict() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 889 unit: def step() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 890 unit: def inherit_from() file: src/nanotron/optim/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 891 unit: def state_dict_additional_keys() file: src/nanotron/optim/optimizer_from_gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 892 unit: def _del_grad() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 893 unit: def data_ptr() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 894 unit: def extract_parallel_ranks_from_shard_path() file: src/nanotron/optim/zero.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 895 unit: def __init__() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 896 unit: def backward() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 897 unit: def step() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 898 unit: def sync_gradients_across_dp() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 899 unit: def zero_grad() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 900 unit: def get_parameter_for_optimizer() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 901 unit: def get_grad_buffer() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 902 unit: def state_dict() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 903 unit: def load_state_dict() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 904 unit: def build_grad_buffers() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 905 unit: def get_parameter_for_optimizer() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 906 unit: def get_grad_buffer() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 907 unit: def state_dict() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 908 unit: def __post_init__() file: src/nanotron/optim/gradient_accumulator.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 909 unit: def broadcast_tensors() file: src/nanotron/generation/decode.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 910 unit: def __call__() file: src/nanotron/generation/sampler.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 911 unit: def __init__() file: src/nanotron/generation/generate_store.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 912 unit: def _detach_store() file: src/nanotron/generation/generate_store.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 913 unit: def __exit__() file: src/nanotron/utils.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 914 unit: def __repr__() file: src/nanotron/utils.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 915 unit: def global_batch_size() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 916 unit: def global_batch_size_in_tokens() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 917 unit: def as_dict() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 918 unit: def get_config_from_dict() file: src/nanotron/config/config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 919 unit: def __post_init__() file: src/nanotron/config/lighteval_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 920 unit: def __post_init__() file: src/nanotron/config/lighteval_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 921 unit: def __post_init__() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 922 unit: def is_using_mup() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 923 unit: def is_using_mup() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 924 unit: def is_moe_model() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 925 unit: def n_embed() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 926 unit: def n_head() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 927 unit: def n_layer() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 928 unit: def n_positions() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 929 unit: def n_inner() file: src/nanotron/config/models_config.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 930 unit: def pre_init() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 931 unit: def training_step() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 932 unit: def setup_log_writers() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 933 unit: def mark_tied_parameters() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 934 unit: def mark_unsharded_params_as_tied_across_tp() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 935 unit: def mark_unsharded_params_as_tied_across_expert() file: src/nanotron/trainer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 936 unit: def __call__() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 937 unit: def __call__() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 938 unit: def __call__() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 939 unit: def __call__() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 940 unit: def register_activation_requiring_backward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 941 unit: def register_send_activation() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 942 unit: def register_recv_activation() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 943 unit: def register_send_grad() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 944 unit: def register_recv_grad() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 945 unit: def run_communication() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 946 unit: def new_micro_batch_forward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 947 unit: def pop_last_activations_requiring_backward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 948 unit: def register_activation_requiring_backward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 949 unit: def register_send_activation() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 950 unit: def register_recv_activation() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 951 unit: def register_send_grad() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 952 unit: def register_recv_grad() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 953 unit: def new_micro_batch_forward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 954 unit: def pop_last_activations_requiring_backward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 955 unit: def register_activation_requiring_backward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 956 unit: def register_send_grad() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 957 unit: def register_recv_grad() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 958 unit: def new_micro_batch_forward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 959 unit: def pop_last_activations_requiring_backward() file: src/nanotron/parallel/pipeline_parallel/state.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 960 unit: def backward() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 961 unit: def __str__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 962 unit: def __format__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 963 unit: def __init__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 964 unit: def __str__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 965 unit: def __init__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 966 unit: def __str__() file: src/nanotron/parallel/pipeline_parallel/engine.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 967 unit: def extra_repr() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 968 unit: def set_pipeline_state() file: src/nanotron/parallel/pipeline_parallel/block.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 969 unit: def to_second_metadata() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 970 unit: def _recv_second_metadata_p2p_op() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 971 unit: def _recv_data_p2p_op() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 972 unit: def irecv_tensors() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 973 unit: def __str__() file: src/nanotron/parallel/pipeline_parallel/p2p.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 974 unit: def slice_to_str() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 3 number of parameters: 1 id: 975 unit: def str_to_slice() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 3 number of parameters: 1 id: 976 unit: def tuple_to_str() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 3 number of parameters: 2 id: 977 unit: def tuple_from_str() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 978 unit: def get_full_name_from_module_id_to_prefix() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 979 unit: def is_tp_sharded() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 980 unit: def is_expert_sharded() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 981 unit: def is_dp_sharded() file: src/nanotron/parallel/parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 982 unit: def extra_repr() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 983 unit: def extra_repr() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 984 unit: def extra_repr() file: src/nanotron/parallel/tensor_parallel/nn.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 985 unit: def __format__() file: src/nanotron/parallel/tensor_parallel/enum.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 986 unit: def __str__() file: src/nanotron/parallel/tensor_parallel/enum.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 987 unit: def backward() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 988 unit: def differentiable_identity() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 989 unit: def differentiable_all_reduce_sum() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 990 unit: def differentiable_all_gather() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 991 unit: def differentiable_reduce_scatter_sum() file: src/nanotron/parallel/tensor_parallel/distributed_differentiable_primitives.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 992 unit: def __init__() file: src/nanotron/parallel/utils.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 993 unit: def get_tied_id_to_param() file: src/nanotron/parallel/tied_parameters.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 994 unit: def masked_mean() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 995 unit: def get_block_compute_costs() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 996 unit: def get_flops_per_sec() file: src/nanotron/models/llama.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 997 unit: def backward() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 998 unit: def dropout_add_fused_train() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 999 unit: def masked_mean() file: src/nanotron/models/starcoder2.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1000 unit: def init_model_randomly() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1001 unit: def tie_custom_params() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1002 unit: def get_embeddings_lm_head_tied_names() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1003 unit: def before_tbi_sanity_checks() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1004 unit: def after_tbi_sanity_checks() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1005 unit: def before_optim_step_sanity_checks() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1006 unit: def after_optim_step_sanity_checks() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1007 unit: def detach() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1008 unit: def type() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1009 unit: def float() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1010 unit: def double() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1011 unit: def half() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1012 unit: def long() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1013 unit: def int() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1014 unit: def short() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1015 unit: def char() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1016 unit: def byte() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1017 unit: def bool() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1018 unit: def bfloat16() file: src/nanotron/models/base.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1019 unit: def _checkpointed_forward() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1020 unit: def masked_mean() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1021 unit: def get_block_compute_costs() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1022 unit: def get_flops_per_sec() file: src/nanotron/models/qwen.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 4 id: 1023 unit: def __init__() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1024 unit: def poll() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1025 unit: def communicate() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1026 unit: def _start_uploading() file: src/nanotron/s3_checkpoints/s3_mover.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 1027 unit: def check_path_is_local() file: src/nanotron/s3_checkpoints/fsspec.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1028 unit: def new_group() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 1029 unit: def all_gather_into_tensor() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 1030 unit: def get_global_ranks() file: src/nanotron/distributed.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 1031 unit: def __repr__() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1032 unit: def update_scaling_factor() file: src/nanotron/fp8/tensor.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 1033 unit: def data() file: src/nanotron/fp8/parameter.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1034 unit: def data() file: src/nanotron/fp8/parameter.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1035 unit: def fp8_meta() file: src/nanotron/fp8/parameter.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1036 unit: def __repr__() file: src/nanotron/fp8/parameter.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1037 unit: def te_dtype() file: src/nanotron/fp8/meta.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1038 unit: def __post_init__() file: src/nanotron/fp8/meta.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 1 id: 1039 unit: def fp8_max() file: src/nanotron/fp8/meta.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1040 unit: def inverse_scale() file: src/nanotron/fp8/meta.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1041 unit: def __repr__() file: src/nanotron/fp8/meta.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1042 unit: def forward() file: src/nanotron/fp8/linear.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 1043 unit: def __getitem__() file: src/nanotron/random.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1044 unit: def __iter__() file: src/nanotron/random.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1045 unit: def __len__() file: src/nanotron/random.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1046 unit: def __delitem__() file: src/nanotron/random.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1047 unit: def __init__() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 2 id: 1048 unit: def _compute_spectral_std() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1049 unit: def get_lr() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1050 unit: def get_lr() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1051 unit: def _get_global_lr() file: src/nanotron/scaling/parametrization.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1052 unit: def sanity_consumed_train_samples() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1053 unit: def consumed_tokens_all_datasets() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1054 unit: def current_stage() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 1 id: 1055 unit: def to_list() file: src/nanotron/serialize/metadata.py start line: 0 end line: 0 size: 2 LOC McCabe index: 2 number of parameters: 5 id: 1056 unit: def get_checkpoint_state_metadata() file: src/nanotron/serialize/optimizer.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 3 id: 1057 unit: def get_exp_tp_pp_rank_and_size_from() file: src/nanotron/serialize/utils.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0 id: 1058 unit: def get_dataloader() file: run_train.py start line: 0 end line: 0 size: 2 LOC McCabe index: 1 number of parameters: 0