in optimum/exporters/openvino/model_patcher.py [0:0]
def deepseek_moe_infer(self, x, topk_ids, topk_weight):
cnts = torch.zeros((topk_ids.shape[0], len(self.experts)))
cnts.scatter_(1, topk_ids, 1)
tokens_per_expert = cnts.sum(dim=0).to(torch.long)
idxs = torch.argsort(topk_ids.view(-1))
sorted_tokens = x[idxs // topk_ids.shape[1]]
outputs = []
start_idx = torch.tensor(0, dtype=torch.long)
for i, num_tokens in enumerate(tokens_per_expert):
end_idx = start_idx + num_tokens
# difference with original: removed skiping expert if empty num_tokens
expert_id = i + self.ep_rank * self.experts_per_rank
expert = self.experts[expert_id]
tokens_for_this_expert = sorted_tokens[start_idx:end_idx]
expert_out = expert(tokens_for_this_expert)
outputs.append(expert_out)
start_idx = end_idx
# difference with original: removed usage torch.new_empty if outputs empty
outs = torch.cat(outputs, dim=0)
new_x = torch.zeros_like(outs)
new_x[idxs] = outs
final_out = (
new_x.view(*topk_ids.shape, -1)
.to(topk_weight.dtype)
.mul_(topk_weight.unsqueeze(dim=-1))
.sum(dim=1)
.to(new_x.dtype)
)
return final_out