in src/nanotron/metrics_logging.py [0:0]
def collect_parameter_metrics(self, model: torch.nn.Module) -> Dict[str, torch.Tensor]:
"""Collect detailed metrics for model parameters by layer and component."""
metrics = {}
max_layers = self.config.model.model_config.num_hidden_layers
formatted_paths = self._format_paths(self.MODEL_COMPONENTS, max_layers)
# Collect metrics for each layer
for layer_idx in range(max_layers):
layer_name = f"layer_{layer_idx}"
for comp_type, subcomponents in self.MODEL_COMPONENTS.items():
for subcomp_name in subcomponents:
path = formatted_paths[comp_type][subcomp_name][layer_idx]
param = get_attribute_by_path(model, path)
if param is not None:
# Add parameter metrics
stats = compute_tensor_stats(param)
for stat_name, value in stats.items():
metrics[f"{layer_name}/{comp_type}/{subcomp_name}/{stat_name}"] = value
# Add gradient stats if available
if hasattr(param, "grad") and param.grad is not None:
grad_stats = compute_tensor_stats(param.grad.detach())
for stat_name, value in grad_stats.items():
metrics[f"{layer_name}/{comp_type}/{subcomp_name}/grad/{stat_name}"] = value
# Get final layer norm
final_ln = get_attribute_by_path(model, "model.final_layer_norm.pp_block.weight")
if final_ln is not None:
stats = compute_tensor_stats(final_ln)
for stat_name, value in stats.items():
metrics[f"final_layernorm/{stat_name}"] = value
return metrics