in src/hyperpod_nemo_adapter/conf/config_schemas.py [0:0]
def after_model_validations(self) -> "BaseModelConfig":
msg_fn = lambda field, val: f"'{field}' is suggested to be a power of 2. Current value is {val}"
if getattr(self, "max_context_width", None) is not None and not is_power_of_two(self.max_context_width):
_logger.warning(msg_fn("max_context_width", self.max_context_width))
if getattr(self, "hidden_size", None) is not None and not is_power_of_two(self.hidden_size):
_logger.warning(msg_fn("hidden_size", self.hidden_size))
if getattr(self, "num_attention_heads", None) is not None and not is_power_of_two(self.num_attention_heads):
_logger.warning(msg_fn("num_attention_heads", self.num_attention_heads))
if getattr(self, "num_key_value_heads", None) is not None and not (
self.num_key_value_heads is None or is_power_of_two(self.num_key_value_heads)
):
_logger.warning(msg_fn("num_key_value_heads", self.num_key_value_heads))
if self.do_finetune and self.hf_model_name_or_path is None:
raise ValueError("Must provide 'hf_model_name_or_path' or set 'do_finetune' to False")
if not smp and (self.tensor_model_parallel_degree > 1 or self.expert_model_parallel_degree > 1):
raise ValueError(
"Non SMP Model implementations do not support tensor_model_parallel_degree or expert_model_parallel_degree > 1"
)
if not self.activation_checkpointing and self.activation_loading_horizon > 1:
_logger.warning(
"Note: activation_loading_horizon will not be activated since activation_checkpointing is disabled"
)
return self