def after_model_validations()

in src/hyperpod_nemo_adapter/conf/config_schemas.py [0:0]


    def after_model_validations(self) -> "BaseModelConfig":
        msg_fn = lambda field, val: f"'{field}' is suggested to be a power of 2. Current value is {val}"

        if getattr(self, "max_context_width", None) is not None and not is_power_of_two(self.max_context_width):
            _logger.warning(msg_fn("max_context_width", self.max_context_width))

        if getattr(self, "hidden_size", None) is not None and not is_power_of_two(self.hidden_size):
            _logger.warning(msg_fn("hidden_size", self.hidden_size))

        if getattr(self, "num_attention_heads", None) is not None and not is_power_of_two(self.num_attention_heads):
            _logger.warning(msg_fn("num_attention_heads", self.num_attention_heads))

        if getattr(self, "num_key_value_heads", None) is not None and not (
            self.num_key_value_heads is None or is_power_of_two(self.num_key_value_heads)
        ):
            _logger.warning(msg_fn("num_key_value_heads", self.num_key_value_heads))

        if self.do_finetune and self.hf_model_name_or_path is None:
            raise ValueError("Must provide 'hf_model_name_or_path' or set 'do_finetune' to False")

        if not smp and (self.tensor_model_parallel_degree > 1 or self.expert_model_parallel_degree > 1):
            raise ValueError(
                "Non SMP Model implementations do not support tensor_model_parallel_degree or expert_model_parallel_degree > 1"
            )

        if not self.activation_checkpointing and self.activation_loading_horizon > 1:
            _logger.warning(
                "Note: activation_loading_horizon will not be activated since activation_checkpointing is disabled"
            )

        return self