in src/configs.js [62:255]
function getNormalizedConfig(config) {
const mapping = {};
let init_normalized_config = {};
switch (config.model_type) {
// Sub-configs
case 'llava':
case 'paligemma':
case 'gemma3':
case 'florence2':
case 'llava_onevision':
case 'idefics3':
case 'ultravox':
case 'smolvlm':
case 'gemma3n':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.text_config);
break;
case 'moondream1':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.phi_config);
break;
case 'musicgen':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.decoder);
break;
case 'multi_modality':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.language_config);
break;
// Decoder-only models
case 'gpt2':
case 'gptj':
case 'jais':
case 'codegen':
case 'gpt_bigcode':
mapping['num_heads'] = 'n_head';
mapping['num_layers'] = 'n_layer';
mapping['hidden_size'] = 'n_embd';
break;
case 'gpt_neox':
case 'stablelm':
case 'opt':
case 'falcon':
mapping['num_heads'] = 'num_attention_heads';
mapping['num_layers'] = 'num_hidden_layers';
mapping['hidden_size'] = 'hidden_size';
break;
case 'llama':
case 'olmo':
case 'olmo2':
case 'mobilellm':
case 'granite':
case 'cohere':
case 'mistral':
case 'starcoder2':
case 'qwen2':
case 'qwen2_vl':
case 'phi':
case 'phi3':
case 'phi3_v':
case 'llava_qwen2':
mapping['num_heads'] = 'num_key_value_heads';
mapping['num_layers'] = 'num_hidden_layers';
mapping['hidden_size'] = 'hidden_size';
mapping['num_attention_heads'] = 'num_attention_heads';
break;
case 'qwen3':
case 'gemma':
case 'gemma2':
case 'gemma3_text':
case 'gemma3n_text':
case 'glm':
case 'helium':
mapping['num_heads'] = 'num_key_value_heads';
mapping['num_layers'] = 'num_hidden_layers';
mapping['dim_kv'] = 'head_dim';
break;
case 'openelm':
mapping['num_heads'] = 'num_kv_heads';
mapping['num_layers'] = 'num_transformer_layers';
mapping['dim_kv'] = 'head_dim';
break;
case 'gpt_neo':
case 'donut-swin':
mapping['num_heads'] = 'num_heads';
mapping['num_layers'] = 'num_layers';
mapping['hidden_size'] = 'hidden_size';
break;
case 'bloom':
mapping['num_heads'] = 'n_head';
mapping['num_layers'] = 'n_layer';
mapping['hidden_size'] = 'hidden_size';
break;
case 'mpt':
mapping['num_heads'] = 'n_heads';
mapping['num_layers'] = 'n_layers';
mapping['hidden_size'] = 'd_model';
break;
case 'exaone':
mapping['num_heads'] = 'num_key_value_heads';
mapping['num_layers'] = 'num_layers';
mapping['dim_kv'] = 'head_dim';
mapping['num_attention_heads'] = 'num_attention_heads';
break;
// Encoder-decoder models
case 't5':
case 'mt5':
case 'longt5':
mapping['num_decoder_layers'] = 'num_decoder_layers';
mapping['num_decoder_heads'] = 'num_heads';
mapping['decoder_dim_kv'] = 'd_kv';
mapping['num_encoder_layers'] = 'num_layers';
mapping['num_encoder_heads'] = 'num_heads';
mapping['encoder_dim_kv'] = 'd_kv';
break;
case 'bart':
case 'mbart':
case 'marian':
case 'whisper':
case 'lite-whisper':
case 'm2m_100':
case 'blenderbot':
case 'blenderbot-small':
case 'florence2_language':
mapping['num_decoder_layers'] = 'decoder_layers';
mapping['num_decoder_heads'] = 'decoder_attention_heads';
mapping['decoder_hidden_size'] = 'd_model';
mapping['num_encoder_layers'] = 'encoder_layers';
mapping['num_encoder_heads'] = 'encoder_attention_heads';
mapping['encoder_hidden_size'] = 'd_model';
break;
case 'speecht5':
mapping['num_decoder_layers'] = 'decoder_layers';
mapping['num_decoder_heads'] = 'decoder_attention_heads';
mapping['decoder_hidden_size'] = 'hidden_size';
mapping['num_encoder_layers'] = 'encoder_layers';
mapping['num_encoder_heads'] = 'encoder_attention_heads';
mapping['encoder_hidden_size'] = 'hidden_size';
break;
case 'trocr':
mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'decoder_layers';
mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'decoder_attention_heads';
mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'd_model';
break;
case 'musicgen_decoder':
mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'num_hidden_layers';
mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'num_attention_heads';
mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
break;
case 'moonshine':
mapping['num_decoder_layers'] = 'decoder_num_hidden_layers';
mapping['num_decoder_heads'] = 'decoder_num_key_value_heads';
mapping['num_encoder_layers'] = 'encoder_num_hidden_layers';
mapping['num_encoder_heads'] = 'encoder_num_key_value_heads';
mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
break;
case 'vision-encoder-decoder':
// @ts-expect-error TS2339
const decoderConfig = getNormalizedConfig(config.decoder);
const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
const result = pick(config, ['model_type', 'is_encoder_decoder']);
if (add_encoder_pkv) {
// Decoder is part of an encoder-decoder model
result.num_decoder_layers = decoderConfig.num_decoder_layers;
result.num_decoder_heads = decoderConfig.num_decoder_heads;
result.decoder_hidden_size = decoderConfig.decoder_hidden_size;
result.num_encoder_layers = decoderConfig.num_encoder_layers;
result.num_encoder_heads = decoderConfig.num_encoder_heads;
result.encoder_hidden_size = decoderConfig.encoder_hidden_size;
} else {
// Decoder is a decoder-only model
result.num_layers = decoderConfig.num_layers;
result.num_heads = decoderConfig.num_heads;
result.hidden_size = decoderConfig.hidden_size;
}
return result;
}
// NOTE: If `num_attention_heads` is not set, it is assumed to be equal to `num_heads`
const normalized_config = {
...init_normalized_config,
...pick(config, ['model_type', 'multi_query', 'is_encoder_decoder']),
};
for (const key in mapping) {
normalized_config[key] = config[mapping[key]];
}
return normalized_config;
}