graph G { compound="true" rankdir="TB" bgcolor="white" fontname="Tahoma" node [ fixedsize="false" fontname="Tahoma" color="white" fillcolor="deepskyblue2" fontcolor="black" shape="box" style="filled" penwidth="1.0" ] edge [ fontname="Arial" color="#00688b" fontcolor="black" fontsize="12" arrowsize="0.5" penwidth="1.0" ] "[src/nanotron/nn/moe.py]" -- "[src/nanotron/data/nemo_dataset/blendable_dataset.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/optim/gradient_accumulator.py]" -- "[src/nanotron/helpers.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/llama3_ring_attention.py]" -- "[src/nanotron/helpers.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/moe.py]" -- "[run_train.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/models/base.py]" -- "[src/nanotron/config/parallelism_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/helpers.py]" -- "[src/nanotron/eval/one_job_runner.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/timers.py]" -- "[src/nanotron/logging/logmixin.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/__init__.py]" -- "[src/nanotron/data/nemo_dataset/blendable_dataset.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/weights.py]" -- "[src/nanotron/config/lighteval_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/main.py]" -- "[src/nanotron/data/clm_collator.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/main.py]" -- "[src/nanotron/config/parallelism_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/base.py]" -- "[src/nanotron/config/parallelism_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/llama3_ring_attention.py]" -- "[src/nanotron/data/nemo_dataset/blendable_dataset.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/eval/upload_to_wandb.py]" -- "[src/nanotron/data/tokenized_bytes.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/timers.py]" -- "[src/nanotron/data/clm_collator.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/parallel/context.py]" -- "[src/nanotron/logging/timers.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/weights.py]" -- "[src/nanotron/eval/one_job_runner.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/parallel/context.py]" -- "[src/nanotron/logging/logmixin.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/config/parallelism_config.py]" -- "[src/nanotron/config/models_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/logmixin.py]" -- "[src/nanotron/data/tokenized_bytes.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/moe.py]" -- "[src/nanotron/logging/base.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/timers.py]" -- "[pyproject.toml]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/attention.py]" -- "[src/nanotron/logging/logmixin.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/optim/gradient_accumulator.py]" -- "[src/nanotron/models/base.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/moe.py]" -- "[src/nanotron/logging/timers.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/optimizer.py]" -- "[src/nanotron/logging/timers.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/helpers.py]" -- "[src/nanotron/config/lighteval_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/scaling/parametrization.py]" -- "[src/nanotron/nn/llama3_ring_attention.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/weights.py]" -- "[src/nanotron/constants.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/attention.py]" -- "[src/nanotron/data/nemo_dataset/blendable_dataset.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/rotary.py]" -- "[src/nanotron/config/lighteval_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/logging/__init__.py]" -- "[pyproject.toml]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/scaling/parametrization.py]" -- "[src/nanotron/models/base.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/main.py]" -- "[pyproject.toml]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/moe.py]" -- "[src/nanotron/eval/upload_to_wandb.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/attention.py]" -- "[src/nanotron/config/config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/weights.py]" -- "[src/nanotron/models/qwen.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/eval/upload_to_wandb.py]" -- "[src/nanotron/constants.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/data/clm_collator.py]" -- "[src/nanotron/config/config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/data/clm_collator.py]" -- "[src/nanotron/config/parallelism_config.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/nn/moe.py]" -- "[src/nanotron/constants.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/optimizer.py]" -- "[src/nanotron/data/tokenized_bytes.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/sanity_checks.py]" -- "[src/nanotron/data/tokenized_bytes.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/sanity_checks.py]" -- "[src/nanotron/eval/upload_to_wandb.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/config/parallelism_config.py]" -- "[run_train.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/metadata.py]" -- "[src/nanotron/parallel/context.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/trainer.py]" -- "[src/nanotron/parallel/context.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/parallel/context.py]" -- "[src/nanotron/data/clm_collator.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/serialize/weights.py]" -- "[src/nanotron/parallel/context.py]" [label=" 1 ", penwidth="1", color="#00688b5E"]; "[src/nanotron/data/clm_collator.py]" -- "[pyproject.toml]" [label=" 1 ", penwidth="1", color="#00688b5E"]; }