in chatlearn/utils/vllm_utils.py [0:0]
def map_src_to_dst(self):
for src_name in self.src_names:
# convert word embeddings.
if src_name in self.embedding_sync_map:
self._dst_names.append(self.get_dst_name(self.embedding_sync_map, src_name))
continue
# final layer
if src_name in self.final_layer_sync_map:
self._dst_names.append(self.get_dst_name(self.final_layer_sync_map, src_name))
continue
m = self.layer_re.match(src_name)
# Stop if that's not a layer
if m is None:
raise RuntimeError(f"expect src_name to be a layer, while {src_name}")
# The index of the layer.
layer_idx = int(m.group(1)) + self.layer_offset
# The name of the operation.
op_name = m.group(2)
# Is it a weight or a bias?
weight_or_bias = m.group(3)
# The name of the layer.
layer_name = f"{self.dst_prefix}.{self.layer_prefix}.{layer_idx}"
# For layernorm(s), simply store the layer norm.
if op_name.endswith("layernorm"):
if self.qwen_version == QwenVersion.v_1:
if "attention." in op_name:
self._dst_names.append(
layer_name + self.get_dst_name(self.layer_sync_map, ".attn.attention_layernorm.") + weight_or_bias)
if "mlp." in op_name:
self._dst_names.append(
layer_name + self.get_dst_name(self.layer_sync_map, op_name) + weight_or_bias)
if op_name.startswith("input"):
ln_name = "ln_1" if self.qwen_version == QwenVersion.v_1 else "input_layernorm"
self._dst_names.append(
layer_name + "." + ln_name + "." + weight_or_bias)
elif op_name.startswith("post"):
ln_name = "ln_2" if self.qwen_version == QwenVersion.v_1 else "post_attention_layernorm"
self._dst_names.append(
layer_name + "." + ln_name + "." + weight_or_bias)
elif self.qwen_version == QwenVersion.v_2:
raise RuntimeError(f"unsupport layernorm {op_name}.")
elif op_name == "self_attention.rotary_emb":
self._dst_names.apepnd(layer_name + ".attn.rotary_emb.inv_freq")
# Transpose the QKV matrix and the bias.
elif op_name in ["attention.query_key_value", "self_attention.query_key_value"]:
if self.qwen_version == QwenVersion.v_1:
dst_name = layer_name + f".attn.c_attn.{weight_or_bias}"
else:
dst_name = layer_name + f".self_attn.qkv_proj.{weight_or_bias}"
self._dst_names.append(dst_name)
elif op_name in ["mlp.w1", "mlp.w2"]:
out_name = self.layer_sync_map[op_name]
gate_up_proj_name = layer_name + out_name + "weight"
if gate_up_proj_name not in self._dst_names:
self._dst_names.append(gate_up_proj_name)
elif op_name in ["mlp.shared_experts.dense_h_to_4h"]:
out_name = self.layer_sync_map[op_name]
gate_up_proj_name = layer_name + out_name + "weight"
self._dst_names.append(gate_up_proj_name)
elif "mlp.experts" in op_name:
out_name = self.layer_sync_map[op_name]
self._dst_names.append(layer_name + out_name)
# Transpose the weights.
elif weight_or_bias in ["weight", "bias"]:
out_name = self.layer_sync_map[op_name]
self._dst_names.append(layer_name + out_name + weight_or_bias)