static inline int rdma_req_handle_cq_entry()

in src/nccl_ofi_rdma.cpp [1562:1698]


static inline int rdma_req_handle_cq_entry(nccl_net_ofi_context_t *ctx,
					   struct fi_cq_entry *cq_entry_base,
					   uint16_t rail_id)
{
	int ret = 0;
	auto cq_entry = reinterpret_cast<fi_cq_data_entry *>(cq_entry_base);
	uint64_t comp_flags = cq_entry->flags;

	rdma_req_send_data_t *send_data = NULL;
	rdma_req_rma_op_data_t *rma_op_data = NULL;

	/* The context for these operations is req. */
	nccl_net_ofi_rdma_req_t *req = rdma_context_get_req(ctx, rail_id);
	if (OFI_UNLIKELY(req == NULL)) {
		NCCL_OFI_WARN("Completion with unexpected NULL op_context");
		return -EINVAL;
	}

	/**
	 * Types of completions:
	 * 1. SEND: connect, connect response, or control message
	 * 2. RECV w/o immediate data: connect, connect response, or control message
	 * 3. RECV w/ immediate data: eager message
	 * 5. Local-initiated write: send operation, RMA write, or RMA write inline
	 * 6. READ: flush, eager copy, or RMA read
	 */

	if (comp_flags & FI_SEND) {
		/* Send completions */

		if (req->type == NCCL_OFI_RDMA_SEND_CONN || req->type == NCCL_OFI_RDMA_SEND_CONN_RESP) {
			/* CONN or CONN_RESP send completion */
			ret = inc_req_completion(req, sizeof(nccl_ofi_rdma_connection_info_t), 1);

		} else if (req->type == NCCL_OFI_RDMA_SEND_CTRL) {
			/* CTRL message send completion */
			NCCL_OFI_TRACE_SEND_CTRL_END(req->dev_id, rail_id, req->comm, req, req->msg_seq_num);
			ret = set_send_ctrl_completed(req);

		} else if (req->type == NCCL_OFI_RDMA_SEND) {
			/* Eager message send completion */
			NCCL_OFI_TRACE_EAGER_SEND_COMPLETE(req->dev_id, rail_id, req->comm, req->msg_seq_num, req);
			send_data = get_send_data(req);
			assert(send_data->eager);
			ret = inc_req_completion(req, 0, send_data->total_num_compls);
		} else if (req->type == NCCL_OFI_RDMA_SEND_CLOSE) {
			ret = inc_req_completion(req, sizeof(nccl_net_ofi_rdma_close_msg_t), 1);
		} else {
			NCCL_OFI_WARN("Send completion from unexpected request type");
			ret = -EINVAL;
		}
	} else if (comp_flags & FI_RECV) {

		nccl_net_ofi_rdma_device_t *device =
			rdma_endpoint_get_device(get_rx_buff_data(req)->ep);
		/* Receive completions */
		ret = handle_rx_buff_recv(device, rail_id, cq_entry, req,
					  comp_flags & FI_REMOTE_CQ_DATA);

	} else if (comp_flags & FI_WRITE) {
		switch (req->type) {
		case NCCL_OFI_RDMA_SEND: {
			/* Local-initiated write of send operation is complete */
			NCCL_OFI_TRACE_SEND_WRITE_SEG_COMPLETE(req->dev_id, rail_id, req->comm, req->msg_seq_num,
								req);

			send_data = get_send_data(req);
			ret = inc_req_completion(req, 0, send_data->total_num_compls);
			break;
		}
		case NCCL_OFI_RDMA_WRITE: {
			/* Local-initiated RMA write is complete */

			rma_op_data = req_get_rma_op_data(req, NCCL_OFI_RDMA_WRITE);
			ret = inc_req_completion(req, 0, rma_op_data->total_num_compls);
			break;
		}
		case NCCL_OFI_RDMA_READ:
		case NCCL_OFI_RDMA_RECV:
		case NCCL_OFI_RDMA_SEND_CTRL:
		case NCCL_OFI_RDMA_SEND_CLOSE:
		case NCCL_OFI_RDMA_RECV_SEGMS:
		case NCCL_OFI_RDMA_EAGER_COPY:
		case NCCL_OFI_RDMA_CTRL_RX_BUFF:
		case NCCL_OFI_RDMA_EAGER_RX_BUFF:
		case NCCL_OFI_RDMA_FLUSH:
		case NCCL_OFI_RDMA_SEND_CONN:
		case NCCL_OFI_RDMA_RECV_CONN:
		case NCCL_OFI_RDMA_RECV_CONN_RESP:
		case NCCL_OFI_RDMA_SEND_CONN_RESP:
		case NCCL_OFI_RDMA_INVALID_TYPE:
		default:
			NCCL_OFI_WARN("Write complete from unexpected request type!");
			ret = -EINVAL;
		}
	} else if (comp_flags & FI_READ) {
		switch (req->type) {
		case NCCL_OFI_RDMA_FLUSH: {
			/* fi_read flush is complete */
			ret = handle_flush_comp(req);
			break;
		}
		case NCCL_OFI_RDMA_EAGER_COPY: {
			ret = set_eager_copy_completed(req);
			break;
		}
		case NCCL_OFI_RDMA_READ: {
			/* Local-initiated RMA read is complete */

			rma_op_data = req_get_rma_op_data(req, NCCL_OFI_RDMA_READ);
			ret = inc_req_completion(req, 0, rma_op_data->total_num_compls);
			break;
		}
		case NCCL_OFI_RDMA_SEND:
		case NCCL_OFI_RDMA_WRITE:
		case NCCL_OFI_RDMA_RECV:
		case NCCL_OFI_RDMA_SEND_CTRL:
		case NCCL_OFI_RDMA_SEND_CLOSE:
		case NCCL_OFI_RDMA_RECV_SEGMS:
		case NCCL_OFI_RDMA_CTRL_RX_BUFF:
		case NCCL_OFI_RDMA_EAGER_RX_BUFF:
		case NCCL_OFI_RDMA_SEND_CONN:
		case NCCL_OFI_RDMA_RECV_CONN:
		case NCCL_OFI_RDMA_RECV_CONN_RESP:
		case NCCL_OFI_RDMA_SEND_CONN_RESP:
		case NCCL_OFI_RDMA_INVALID_TYPE:
		default:
			NCCL_OFI_WARN("Read complete from unexpected request type!");
			ret = -EINVAL;
		}
	} else {
		NCCL_OFI_WARN("Unexpected comp_flags on cq event 0x%016" PRIX64, comp_flags);
		ret = -EINVAL;
	}

	return ret;
}