in src/nccl_ofi_rdma.cpp [5561:5627]
static int send_progress(nccl_net_ofi_rdma_req_t *req)
{
ssize_t ret = 0;;
nccl_net_ofi_rdma_send_comm_t *s_comm = (nccl_net_ofi_rdma_send_comm_t *)req->comm;
assert(req != NULL);
if (req->type == NCCL_OFI_RDMA_SEND) { // Post RDMA write
rdma_req_send_data_t *send_data = get_send_data(req);
// Get Schedule
nccl_net_ofi_schedule_t *schedule = send_data->schedule;
if (OFI_UNLIKELY(schedule == NULL)) {
NCCL_OFI_WARN("Schedule for req %p is NULL", req);
return -ENOTSUP;;
}
assert(!(send_data->eager) || schedule->num_xfer_infos == 1);
nccl_net_ofi_xfer_info_t *xfers = schedule->rail_xfer_infos;
if (send_data->eager) {
/* Get xfer information from the schedule */
nccl_net_ofi_xfer_info_t *xfer_info = &xfers[0];
/* Get communicator rail information to xfer the req */
nccl_net_ofi_rdma_send_comm_rail_t *comm_rail =
rdma_send_comm_get_rail(s_comm, xfer_info->rail_id);
ret = post_rdma_eager_send(req, comm_rail, xfer_info);
} else {
for (uint16_t rail_it = send_data->xferred_rail_id; rail_it < schedule->num_xfer_infos; rail_it++) {
/* Get xfer information from the schedule */
nccl_net_ofi_xfer_info_t *xfer_info = &xfers[rail_it];
/* Get communicator rail information to xfer the req */
nccl_net_ofi_rdma_send_comm_rail_t *comm_rail =
rdma_send_comm_get_rail(s_comm, xfer_info->rail_id);
ret = post_rdma_write(req, comm_rail, xfer_info, send_data->no_target_completion);
if (ret == 0) // Successfully sent the xfer with this rail
send_data->xferred_rail_id++;
else
break;
}
}
} else if (req->type == NCCL_OFI_RDMA_WRITE) { // Post RMA write
ret = post_rma_write(req);
if (ret == 0) {
rdma_req_rma_op_data_t *rma_op_data = req_get_rma_op_data(req, NCCL_OFI_RDMA_WRITE);
// Successfully sent the xfer with this rail
rma_op_data->xferred_rail_id++;
}
} else if (req->type == NCCL_OFI_RDMA_CTRL_RX_BUFF ||
req->type == NCCL_OFI_RDMA_EAGER_RX_BUFF) { // Post rx Buffer
rdma_req_rx_buff_data_t *rx_buff_data = get_rx_buff_data(req);
/* Get ep rail information to xfer the req */
assert(rx_buff_data->rail != NULL);
ret = post_rx_buffer(req, rx_buff_data->rail, false);
} else {
NCCL_OFI_WARN("Unexpected request type. Request type: %d", req->type);
ret = -EINVAL;
}
return ret;
}