in backends/python/server/text_embeddings_server/models/flash_qwen3.py [0:0]
def __init__(self, model_path, weight_map_json, device, dtype, config: Qwen3Config):
self.word_embeddings_weight = load_weight(
model_path,
weight_map_json["weight_map"],
"embed_tokens.weight",
dtype,
device,
)
self.layers = [
Qwen3DecoderLayer(
model_path,
weight_map_json["weight_map"],
device,
dtype,
config,
layer_idx,
)
for layer_idx in range(config.num_hidden_layers)
]
self.rotary_emb = Qwen3RotaryEmbedding(config=config, device=device)
self.norm = Qwen3RMSNorm(
model_path,
weight_map_json["weight_map"],
f"norm.weight",
device,
dtype,
eps=config.rms_norm_eps,
)