in grok/transformer.py [0:0]
def forward(self, input: Tensor) -> Tensor:
if self.weight_noise > 0 and self.training:
weight = self.weight + torch.randn_like(self.weight) * self.weight_noise
# weight = self.weight * torch.exp(torch.randn_like(self.weight) * self.weight_noise)
else:
weight = self.weight
return F.embedding(
input,
weight,
self.padding_idx,
self.max_norm,
self.norm_type,
self.scale_grad_by_freq,
self.sparse,
)