in src/engine/step4/model_dev/utils/model.py [0:0]
def configure_optimizers(self):
"""
Prepare optimizer and schedule (linear warmup and decay)
Args:
None
Returns:
Tuple of optimizer and scheduler.
"""
model = self.model
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
{
"params": [
p
for n, p in model.named_parameters()
if not any(nd in n for nd in no_decay)
],
"weight_decay": self.hparams.weight_decay,
},
{
"params": [
p
for n, p in model.named_parameters()
if any(nd in n for nd in no_decay)
],
"weight_decay": 0.0,
},
]
optimizer = AdamW(
optimizer_grouped_parameters,
lr=self.hparams.learning_rate,
eps=self.hparams.adam_epsilon,
)
self.opt = optimizer
scheduler = CosineAnnealingLR(
optimizer, T_max=100, eta_min=1e-5, last_epoch=-1, verbose=False
)
return [optimizer], [scheduler]