in model/train.py [0:0]
def weights_init(m):
classname = m.__class__.__name__
if classname.find("Linear") != -1:
if hasattr(m, "weight") and m.weight is not None:
init_weight(m.weight)
if hasattr(m, "bias") and m.bias is not None:
init_bias(m.bias)
elif classname.find("AdaptiveEmbedding") != -1:
if hasattr(m, "emb_projs"):
for i in range(len(m.emb_projs)):
if m.emb_projs[i] is not None:
init_embed(m.emb_projs[i])
elif classname.find("Embedding") != -1:
if hasattr(m, "weight"):
init_weight(m.weight)
elif classname.find("ProjectedAdaptiveLogSoftmax") != -1:
if hasattr(m, "cluster_weight") and m.cluster_weight is not None:
init_weight(m.cluster_weight)
if hasattr(m, "cluster_bias") and m.cluster_bias is not None:
init_bias(m.cluster_bias)
if hasattr(m, "out_projs"):
for i in range(len(m.out_projs)):
if m.out_projs[i] is not None:
init_embed(m.out_projs[i])
elif classname.find("LayerNorm") != -1:
if hasattr(m, "weight"):
nn.init.normal_(m.weight, 1.0, cfg.INITIALIZER.base_init[1])
if hasattr(m, "bias") and m.bias is not None:
init_bias(m.bias)
elif classname.find("TransformerLM") != -1:
if hasattr(m, "r_emb"):
init_weight(m.r_emb)
if hasattr(m, "r_w_bias"):
init_weight(m.r_w_bias)
if hasattr(m, "r_r_bias"):
init_weight(m.r_r_bias)
if hasattr(m, "r_bias"):
init_bias(m.r_bias)