in timm/optim/_optim_factory.py [0:0]
def _register_other_optimizers(registry: OptimizerRegistry) -> None:
"""Register miscellaneous optimizers"""
other_optimizers = [
OptimInfo(
name='adabelief',
opt_class=AdaBelief,
description='Adapts learning rate based on gradient prediction error',
has_betas=True,
defaults={'rectify': False}
),
OptimInfo(
name='radabelief',
opt_class=AdaBelief,
description='Rectified AdaBelief with variance adaptation',
has_betas=True,
defaults={'rectify': True}
),
OptimInfo(
name='adadelta',
opt_class=torch.optim.Adadelta,
description='torch.optim.Adadelta, Adapts learning rates based on running windows of gradients'
),
OptimInfo(
name='adagrad',
opt_class=torch.optim.Adagrad,
description='torch.optim.Adagrad, Adapts learning rates using cumulative squared gradients',
defaults={'eps': 1e-8}
),
OptimInfo(
name='adan',
opt_class=Adan,
description='Adaptive Nesterov Momentum Algorithm',
defaults={'no_prox': False},
has_betas=True,
num_betas=3
),
OptimInfo(
name='adanw',
opt_class=Adan,
description='Adaptive Nesterov Momentum with decoupled weight decay',
defaults={'no_prox': True},
has_betas=True,
num_betas=3
),
OptimInfo(
name='adahessian',
opt_class=Adahessian,
description='An Adaptive Second Order Optimizer',
has_betas=True,
second_order=True,
),
OptimInfo(
name='kron',
opt_class=Kron,
description='PSGD optimizer with Kronecker-factored preconditioner',
has_momentum=True,
),
OptimInfo(
name='kronw',
opt_class=Kron,
description='PSGD optimizer with Kronecker-factored preconditioner and decoupled weight decay',
has_momentum=True,
defaults={'decoupled_decay': True}
),
OptimInfo(
name='laprop',
opt_class=LaProp,
description='Separating Momentum and Adaptivity in Adam',
has_betas=True,
),
OptimInfo(
name='lion',
opt_class=Lion,
description='Evolved Sign Momentum optimizer for improved convergence',
has_eps=False,
has_betas=True
),
OptimInfo(
name='madgrad',
opt_class=MADGRAD,
description='Momentum-based Adaptive gradient method',
has_momentum=True
),
OptimInfo(
name='madgradw',
opt_class=MADGRAD,
description='MADGRAD with decoupled weight decay',
has_momentum=True,
defaults={'decoupled_decay': True}
),
OptimInfo(
name='mars',
opt_class=Mars,
description='Unleashing the Power of Variance Reduction for Training Large Models',
has_betas=True,
),
OptimInfo(
name='novograd',
opt_class=NvNovoGrad,
description='Normalized Adam with L2 norm gradient normalization',
has_betas=True
),
OptimInfo(
name='rmsprop',
opt_class=torch.optim.RMSprop,
description='torch.optim.RMSprop, Root Mean Square Propagation',
has_momentum=True,
defaults={'alpha': 0.9}
),
OptimInfo(
name='rmsproptf',
opt_class=RMSpropTF,
description='TensorFlow-style RMSprop implementation, Root Mean Square Propagation',
has_momentum=True,
defaults={'alpha': 0.9}
),
]
for opt in other_optimizers:
registry.register(opt)
registry.register_foreach_default('lion')