in deep_ep/buffer.py [0:0]
def clean_low_latency_buffer(self, num_max_dispatch_tokens_per_rank: int, hidden: int, num_experts: int) -> None:
"""
As low-latency kernels require part of the buffer to be zero-initialized, so it is vital to clean the buffer
if the buffer is dirty at some time.
For example, after running the normal dispatch/combine, you must run this function before executing any
low-latency kernel.
Arguments:
num_max_dispatch_tokens_per_rank: the maximum number of tokens to dispatch, all the ranks must hold the same value.
hidden: the hidden dimension of each token.
num_experts: the number of all experts.
"""
self.runtime.clean_low_latency_buffer(num_max_dispatch_tokens_per_rank, hidden, num_experts)