in optimum/neuron/accelerate/scheduler.py [0:0]
def step(self, *args, **kwargs):
if not self.step_with_optimizer:
# No link between scheduler and optimizer -> just step
self.scheduler.step(*args, **kwargs)
return
# Otherwise, first make sure the optimizer was stepped.
if not self.gradient_state.sync_gradients:
if self.gradient_state.adjust_scheduler:
self.scheduler._step_count += 1
return
for opt in self.optimizers:
if opt.step_was_skipped:
return
if self.split_batches:
# Split batches -> the training dataloader batch size is not changed so one step per training step
self.scheduler.step(*args, **kwargs)
else:
# Otherwise the training dataloader batch size was multiplied by `num_processes`, so we need to do
# num_processes steps per training step
num_processes = NeuronAcceleratorState().num_processes
for _ in range(num_processes):
# Special case when using OneCycle and `drop_last` was not used
if hasattr(self.scheduler, "total_steps"):
if self.scheduler._step_count <= self.scheduler.total_steps:
self.scheduler.step(*args, **kwargs)
else:
self.scheduler.step(*args, **kwargs)