in timm/optim/_optim_factory.py [0:0]
def _register_adam_variants(registry: OptimizerRegistry) -> None:
"""Register Adam-based optimizers"""
adam_optimizers = [
OptimInfo(
name='adam',
opt_class=torch.optim.Adam,
description='torch.optim.Adam, Adaptive Moment Estimation',
has_betas=True
),
OptimInfo(
name='adamw',
opt_class=torch.optim.AdamW,
description='torch.optim.AdamW, Adam with decoupled weight decay',
has_betas=True
),
OptimInfo(
name='adamwlegacy',
opt_class=AdamWLegacy,
description='legacy impl of AdamW that pre-dates inclusion to torch.optim',
has_betas=True
),
OptimInfo(
name='adamp',
opt_class=AdamP,
description='Adam with built-in projection to unit norm sphere',
has_betas=True,
defaults={'wd_ratio': 0.01, 'nesterov': True}
),
OptimInfo(
name='nadam',
opt_class=torch.optim.NAdam,
description='torch.optim.NAdam, Adam with Nesterov momentum',
has_betas=True
),
OptimInfo(
name='nadamlegacy',
opt_class=NAdamLegacy,
description='legacy impl of NAdam that pre-dates inclusion in torch.optim',
has_betas=True
),
OptimInfo(
name='nadamw',
opt_class=NAdamW,
description='Adam with Nesterov momentum and decoupled weight decay, mlcommons/algorithmic-efficiency impl',
has_betas=True
),
OptimInfo(
name='radam',
opt_class=torch.optim.RAdam,
description='torch.optim.RAdam, Rectified Adam with variance adaptation',
has_betas=True
),
OptimInfo(
name='radamlegacy',
opt_class=RAdamLegacy,
description='legacy impl of RAdam that predates inclusion in torch.optim',
has_betas=True
),
OptimInfo(
name='radamw',
opt_class=torch.optim.RAdam,
description='torch.optim.RAdamW, Rectified Adam with variance adaptation and decoupled weight decay',
has_betas=True,
defaults={'decoupled_weight_decay': True}
),
OptimInfo(
name='adamax',
opt_class=torch.optim.Adamax,
description='torch.optim.Adamax, Adam with infinity norm for more stable updates',
has_betas=True
),
OptimInfo(
name='adafactor',
opt_class=Adafactor,
description='Memory-efficient implementation of Adam with factored gradients',
),
OptimInfo(
name='adafactorbv',
opt_class=AdafactorBigVision,
description='Big Vision variant of Adafactor with factored gradients, half precision momentum',
),
OptimInfo(
name='adopt',
opt_class=Adopt,
description='Modified Adam that can converge with any β2 with the optimal rate',
),
OptimInfo(
name='adoptw',
opt_class=Adopt,
description='Modified AdamW (decoupled decay) that can converge with any β2 with the optimal rate',
defaults={'decoupled': True}
),
]
for opt in adam_optimizers:
registry.register(opt)