in src/sgd.py [0:0]
def step(self):
for group in self.param_groups:
weight_decay = group['weight_decay']
momentum = group['momentum']
nesterov = group['nesterov']
for p in group['params']:
if p.grad is None:
continue
d_p = p.grad
if weight_decay != 0:
d_p = d_p.add(p, alpha=weight_decay)
d_p.mul_(-group['lr'])
if momentum != 0:
param_state = self.state[p]
if 'momentum_buffer' not in param_state:
buf = param_state['momentum_buffer'] = d_p.clone().detach()
else:
buf = param_state['momentum_buffer']
buf.mul_(momentum).add_(d_p)
if nesterov:
d_p = d_p.add(buf, alpha=momentum)
else:
d_p = buf
p.add_(d_p)
return None