in grok/training.py [0:0]
def configure_optimizers(self) -> Tuple[List[Any], List[Dict]]:
"""
Used by pytorch_lighting
:returns: optimizers and schedulers.
"""
optimizer = CustomAdamW(
self.parameters(),
betas=(0.9, 0.98),
eps=1e-8,
lr=1,
weight_decay=self.hparams.weight_decay,
noise_factor=self.hparams.noise_factor,
weight_decay_form=self.hparams.weight_decay_kind,
)
# optimizer = SAM(
# self.parameters(),
# base_optimizer=CustomAdamW,
# rho=0.05,
# betas=(0.9, 0.98),
# eps=1e-8,
# lr=1,
# weight_decay=self.hparams.weight_decay,
# noise_factor=self.hparams.noise_factor,
# )
schedulers = [
{
"scheduler": LambdaLR(optimizer, lr_lambda=self._scheduler_lr),
"interval": "step",
"frequency": 1,
}
]
return [optimizer], schedulers