in src/accelerate/utils/dataclasses.py [0:0]
def __post_init__(self):
prefix = "MEGATRON_LM_"
if self.tp_degree is None:
self.tp_degree = int(os.environ.get(prefix + "TP_DEGREE", 1))
if self.pp_degree is None:
self.pp_degree = int(os.environ.get(prefix + "PP_DEGREE", 1))
if self.num_micro_batches is None:
self.num_micro_batches = int(os.environ.get(prefix + "NUM_MICRO_BATCHES", 1))
if self.gradient_clipping is None:
self.gradient_clipping = float(os.environ.get(prefix + "GRADIENT_CLIPPING", 1.0))
if self.recompute_activations is None:
self.recompute_activations = str_to_bool(os.environ.get(prefix + "RECOMPUTE_ACTIVATIONS", "False")) == 1
if self.use_distributed_optimizer is None:
self.use_distributed_optimizer = (
str_to_bool(os.environ.get(prefix + "USE_DISTRIBUTED_OPTIMIZER", "False")) == 1
)
if self.sequence_parallelism is None:
self.sequence_parallelism = str_to_bool(os.environ.get(prefix + "SEQUENCE_PARALLELISM", "False")) == 1
if self.pp_degree > 1 or self.use_distributed_optimizer:
self.DDP_impl = "local"
else:
self.DDP_impl = "torch"
if self.consumed_samples is not None:
if len(self.consumed_samples) == 1:
self.consumed_samples.extend([0, 0])
elif len(self.consumed_samples) == 2:
self.consumed_samples.append(0)
self.megatron_lm_default_args = {
"tensor_model_parallel_size": self.tp_degree,
"pipeline_model_parallel_size": self.pp_degree,
"pipeline_model_parallel_split_rank": self.pipeline_model_parallel_split_rank,
"num_layers_per_virtual_pipeline_stage": self.num_layers_per_virtual_pipeline_stage,
"DDP_impl": self.DDP_impl,
"use_distributed_optimizer": self.use_distributed_optimizer,
"sequence_parallel": self.sequence_parallelism,
"clip_grad": self.gradient_clipping,
"num_micro_batches": self.num_micro_batches,
"consumed_samples": self.consumed_samples,
"no_wd_decay_cond": self.no_wd_decay_cond,
"scale_lr_cond": self.scale_lr_cond,
"lr_mult": self.lr_mult,
"megatron_dataset_flag": self.megatron_dataset_flag,
"eval_iters": self.eval_iters,
"eval_interval": self.eval_interval,
}
if self.recompute_activations:
self.megatron_lm_default_args["recompute_granularity"] = "selective"
if self.tensorboard_dir is not None:
self.megatron_lm_default_args["tensorboard_dir"] = self.tensorboard_dir
if self.set_all_logging_options:
self.set_tensorboard_logging_options()
if self.other_megatron_args is not None:
self.megatron_lm_default_args.update(self.other_megatron_args)