chatlearn/utils/megatron_import_memory_helper.py (30 lines of code) (raw):

# Copyright 2024 Alibaba Group Holding Limited. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Version compatibility utilities for Megatron memory management of gradients and parameter weights. Base on how Megatron uses buffers to manage memory, we support 3 different versions. """ import os from enum import Enum, auto from typing import List __all__ = ['MegatronVersion', 'get_megatron_version', 'check_megatron_versions'] class MegatronVersion(Enum): """ There are currently three different Megatron versions supported. """ V1 = auto() # use `MemoryBuffer` to manage gradients V2 = auto() # use `GradBuffer` to manage gradients V3 = auto() # use `ParamAndGradBuffer` to manage parameter weights and gradients V4 = auto() # for compatibility with temporary version for Qwen-MoE def get_megatron_version(): # for compatibility with temporary version for Qwen-MoE if os.environ.get("QWEN_VERSION", '') == 'qwen_moe_v1': return MegatronVersion.V4 try: # pylint: disable-next=import-outside-toplevel, unused-import from megatron.core.distributed import ParamAndGradBuffer return MegatronVersion.V3 except ImportError: ... try: # pylint: disable-next=import-outside-toplevel, unused-import from megatron.core.distributed import GradBuffer return MegatronVersion.V2 except ImportError: ... return MegatronVersion.V1 def check_megatron_versions(targets: List[MegatronVersion]): version = get_megatron_version() assert version in targets, f'Different Megatron version {version} from targets: {targets}.' _version = get_megatron_version() # pylint: disable=unused-import if _version == MegatronVersion.V3: from megatron.core.distributed.param_and_grad_buffer import BufferType __all__.append('BufferType') # pylint: enable=unused-import