in src/transformers/models/vit/vision_transformer.py [0:0]
def _init_vit_weights(m, n: str = '', head_bias: float = 0., jax_impl: bool = False):
""" ViT weight initialization
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L320
* When called without n, head_bias, jax_impl args it will behave exactly the same
as my original init for compatibility with prev hparam / downstream use cases (ie DeiT).
* When called w/ valid n (module name) and jax_impl=True, will (hopefully) match JAX impl
"""
if isinstance(m, nn.Linear):
if n.startswith('head'):
nn.init.zeros_(m.weight)
nn.init.constant_(m.bias, head_bias)
elif n.startswith('pre_logits'):
lecun_normal_(m.weight)
nn.init.zeros_(m.bias)
else:
if jax_impl:
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
if 'mlp' in n:
nn.init.normal_(m.bias, std=1e-6)
else:
nn.init.zeros_(m.bias)
else:
trunc_normal_(m.weight, std=.02)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif jax_impl and isinstance(m, nn.Conv2d):
# NOTE conv was left to pytorch default in my original init
lecun_normal_(m.weight)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)