def configure_optimizers()

in run_ranking.py [0:0]


    def configure_optimizers(self):

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.hparams.weight_decay,
            },
            {"params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
        scheduler = {
             'scheduler': None,
             'monitor': 'val_loss', # Default: val_loss
             'interval': 'step', # step or epoch
             'frequency': 1
        }

        if self.hparams.lr_schedule == 'linear':
            scheduler['scheduler'] = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.t_total
            )
        elif self.hparams.lr_schedule == 'cosine':
            scheduler['scheduler'] = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.t_total
            )
        elif self.hparams.lr_schedule == 'cosine_hard':
            scheduler['scheduler'] = get_cosine_with_hard_restarts_schedule_with_warmup(
                optimizer, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.t_total, num_cycles=2.0
            )
        else:
            return optimizer

        return [optimizer], [scheduler]