in src/nanotron/models/qwen.py [0:0]
def init_model_randomly(self, config: Config):
"""Initialize model parameters randomly."""
init_method = config.model.init_method
if isinstance(init_method, RandomInit):
parametrizator_cls = StandardParametrizator
elif isinstance(init_method, SpectralMupInit):
parametrizator_cls = SpectralMupParametrizator
else:
raise ValueError(f"Unknown init method {init_method}")
parametrizator = parametrizator_cls(config=config)
log_rank(
f"Parametrizing model parameters using {parametrizator.__class__.__name__}",
logger=logger,
level=logging.INFO,
rank=0,
)
model = self
initialized_parameters = set()
# Handle tensor parallelism
module_id_to_prefix = {id(module): f"{module_name}." for module_name, module in model.named_modules()}
# Fix the root_model
module_id_to_prefix[id(model)] = ""
for param_name, param in model.named_parameters():
assert isinstance(param, NanotronParameter)
module_name, param_name = param_name.rsplit(".", 1)
if param.is_tied:
tied_info = param.get_tied_info()
full_param_name = tied_info.get_full_name_from_module_id_to_prefix(
module_id_to_prefix=module_id_to_prefix
)
else:
full_param_name = f"{module_name}.{param_name}"
if full_param_name in initialized_parameters:
# Already initialized
continue
module = model.get_submodule(module_name)
parametrizator.parametrize(param_name, module)
assert full_param_name not in initialized_parameters
initialized_parameters.add(full_param_name)
assert initialized_parameters == {
param.get_tied_info().get_full_name_from_module_id_to_prefix(module_id_to_prefix=module_id_to_prefix)
if param.is_tied
else name
for name, param in model.named_parameters()
}, f"Somehow the initialized set of parameters don't match:\n - Expected: { {name for name, _ in model.named_parameters()} }\n - Got: {initialized_parameters}"