optimum/neuron/accelerate/scheduler.py (24 lines of code) (raw):
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Custom AcceleratedScheduler for Neuron."""
from accelerate.scheduler import AcceleratedScheduler
from .state import NeuronAcceleratorState
class NeuronAcceleratedScheduler(AcceleratedScheduler):
def step(self, *args, **kwargs):
if not self.step_with_optimizer:
# No link between scheduler and optimizer -> just step
self.scheduler.step(*args, **kwargs)
return
# Otherwise, first make sure the optimizer was stepped.
if not self.gradient_state.sync_gradients:
if self.gradient_state.adjust_scheduler:
self.scheduler._step_count += 1
return
for opt in self.optimizers:
if opt.step_was_skipped:
return
if self.split_batches:
# Split batches -> the training dataloader batch size is not changed so one step per training step
self.scheduler.step(*args, **kwargs)
else:
# Otherwise the training dataloader batch size was multiplied by `num_processes`, so we need to do
# num_processes steps per training step
num_processes = NeuronAcceleratorState().num_processes
for _ in range(num_processes):
# Special case when using OneCycle and `drop_last` was not used
if hasattr(self.scheduler, "total_steps"):
if self.scheduler._step_count <= self.scheduler.total_steps:
self.scheduler.step(*args, **kwargs)
else:
self.scheduler.step(*args, **kwargs)