in higher/optim.py [0:0]
def _update(self, grouped_grads: _GroupedGradsType, **kwargs) -> None:
zipped = zip(self.param_groups, grouped_grads)
for group_idx, (group, grads) in enumerate(zipped):
rho, eps = group['rho'], group['eps']
for p_idx, (p, g) in enumerate(zip(group['params'], grads)):
if g is None:
continue
if g.data.is_sparse:
raise RuntimeError(
'Adadelta does not support sparse gradients'
)
state = self.state[group_idx][p_idx]
# State initialization
if len(state) == 0:
state['step'] = 0
state['square_avg'] = _torch.zeros_like(p.data)
state['acc_delta'] = _torch.zeros_like(p.data)
square_avg, acc_delta = state['square_avg'], state['acc_delta']
state['step'] += 1
if group['weight_decay'] != 0:
g = _add(g, group['weight_decay'], p)
square_avg = _addcmul(square_avg.mul(rho), 1 - rho, g, g)
state['square_avg'] = square_avg
std = _add(square_avg, eps).sqrt()
delta = _add(acc_delta, eps).sqrt().div(std).mul(g)
state['acc_delta'] = _addcmul(
acc_delta.mul(rho), 1 - rho, delta, delta
)
group['params'][p_idx] = _add(p, -group['lr'], delta)