in csrc/deep_ep.cpp [1195:1196]
return torch::from_blob(buffer.combine_rdma_send_buffer_data_start, {num_experts / num_ranks, num_ranks * num_max_dispatch_tokens_per_rank, hidden},