in deep_ep/buffer.py [0:0]
def get_low_latency_rdma_size_hint(num_max_dispatch_tokens_per_rank: int, hidden: int, num_ranks: int, num_experts: int) -> int:
"""
Get a minimum size requirement for the RDMA buffer. The size calculation will be done with BF16.
Arguments:
num_max_dispatch_tokens_per_rank: the maximum number of tokens to dispatch, all the ranks must hold the same value.
hidden: the hidden dimension of each token.
num_ranks: the number of EP group ranks.
num_experts: the number of all experts.
Returns:
size: the RDMA buffer size recommended.
"""
return deep_ep_cpp.get_low_latency_rdma_size_hint(num_max_dispatch_tokens_per_rank, hidden, num_ranks, num_experts)