in src/hyperpod_nemo_adapter/collections/model/nlp/sagemaker_qwen_model.py [0:0]
def get_model_config(self):
"""
Get model config for Qwen
"""
configurable_dict = self._get_model_configurable_dict()
if self._cfg.get("hf_model_name_or_path", None) is not None:
model_config = get_hf_config_from_name_or_path(self._cfg)
assert isinstance(
model_config, Qwen2Config
), f"model_type is set to mistral but hf_model_name_or_path is not the same model, getting {type(model_config)}"
# Update the config based on user's input
model_config.update(configurable_dict)
else:
model_config = Qwen2Config(
**configurable_dict,
hidden_act="silu",
use_cache=False,
pad_token_id=None,
bos_token_id=1,
eos_token_id=2,
tie_word_embeddings=False,
attention_dropout=0.0,
)
return model_config