in maga_transformer/models/cogvlm2_weight.py [0:0]
def _get_hf_layer_weight_info(self):
attn_config = self.attn_config
ffn_config = self.ffn_config
layer_weights = [
AtomicWeight(
W.pre_ln_gamma,
[CkptWeightInfo("model.layers.{i}.input_layernorm.weight", identity)],
identity,
),
AttnAtomicWeight(
W.attn_qkv_w,
[
CkptWeightInfo(
"model.layers.{i}.self_attn.language_expert_query_key_value.weight",
identity,
)
],
transpose,
config=attn_config
),
AttnAtomicWeight(
W.attn_o_w,
[
CkptWeightInfo(
"model.layers.{i}.self_attn.language_expert_dense.weight",
identity,
)
],
transpose,
config=attn_config
),
AtomicWeight(
W.vision_attn_qkv_w,
[
CkptWeightInfo(
"model.layers.{i}.self_attn.vision_expert_query_key_value.weight",
identity,
)
],
transpose,
),
AtomicWeight(
W.vision_attn_qkv_b,
[
CkptWeightInfo(
"model.layers.{i}.self_attn.vision_expert_query_key_value.bias",
identity,
)
],
identity,
),
AtomicWeight(
W.vision_attn_o_w,
[
CkptWeightInfo(
"model.layers.{i}.self_attn.vision_expert_dense.weight",
identity,
)
],
transpose,
),
AtomicWeight(
W.post_ln_gamma,
[
CkptWeightInfo(
"model.layers.{i}.post_attention_layernorm.weight", identity
)
],
identity,
),
FfnWeight(sub_weights=[
FfnAtomicWeight(
W.ffn_w2,
[
CkptWeightInfo(
"model.layers.{i}.mlp.language_mlp.down_proj.weight", identity
)
],
transpose,
config=ffn_config
),
FfnAtomicWeight(
W.ffn_w1,
[
CkptWeightInfo(
"model.layers.{i}.mlp.language_mlp.gate_proj.weight", identity
)
],
transpose,
config=ffn_config
),
FfnAtomicWeight(
W.ffn_w3,
[
CkptWeightInfo(
"model.layers.{i}.mlp.language_mlp.up_proj.weight", identity
)
],
transpose,
config=ffn_config
)], config=ffn_config),
AtomicWeight(
W.vision_ffn_w2,
[
CkptWeightInfo(
"model.layers.{i}.mlp.vision_mlp.down_proj.weight", identity
)
],
transpose,
),
AtomicWeight(
W.vision_ffn_w1,
[
CkptWeightInfo(
"model.layers.{i}.mlp.vision_mlp.gate_proj.weight", identity
)
],
transpose,
),
AtomicWeight(
W.vision_ffn_w3,
[
CkptWeightInfo(
"model.layers.{i}.mlp.vision_mlp.up_proj.weight", identity
)
],
transpose,
),
]
return layer_weights