in grok/transformer.py [0:0]
def forward(self, input: Tensor) -> Tensor:
if self.weight_noise > 0 and self.training:
bias = self.bias if self.bias is None else self.bias + torch.randn_like(self.bias) * self.weight_noise
weight = self.weight + torch.randn_like(self.weight) * self.weight_noise
# weight = self.weight * torch.exp(torch.randn_like(self.weight) * self.weight_noise)
else:
bias = self.bias
weight = self.weight
return F.linear(
input,
weight,
bias,
)