in dualpipe/comm.py [0:0]
def append_isend(ops: List[dist.P2POp], tensors: List[torch.Tensor], dst: int, group: dist.ProcessGroup) -> None:
dst = dist.distributed_c10d.get_global_rank(group, dst)
for tensor in tensors:
if tensor is not None:
ops.append(dist.P2POp(dist.isend, tensor, dst))