in timm/models/mlp_mixer.py [0:0]
def _init_weights(module: nn.Module, name: str, head_bias: float = 0., flax: bool = False) -> None:
"""Mixer weight initialization (trying to match Flax defaults).
Args:
module: Module to initialize.
name: Module name.
head_bias: Bias value for head layer.
flax: Use Flax-style initialization.
"""
if isinstance(module, nn.Linear):
if name.startswith('head'):
nn.init.zeros_(module.weight)
nn.init.constant_(module.bias, head_bias)
else:
if flax:
# Flax defaults
lecun_normal_(module.weight)
if module.bias is not None:
nn.init.zeros_(module.bias)
else:
# like MLP init in vit (my original init)
nn.init.xavier_uniform_(module.weight)
if module.bias is not None:
if 'mlp' in name:
nn.init.normal_(module.bias, std=1e-6)
else:
nn.init.zeros_(module.bias)
elif isinstance(module, nn.Conv2d):
lecun_normal_(module.weight)
if module.bias is not None:
nn.init.zeros_(module.bias)
elif isinstance(module, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm)):
nn.init.ones_(module.weight)
nn.init.zeros_(module.bias)
elif hasattr(module, 'init_weights'):
# NOTE if a parent module contains init_weights method, it can override the init of the
# child modules as this will be called in depth-first order.
module.init_weights()