static int nccl_net_ofi_rdma_domain_create_endpoint()

in src/nccl_ofi_rdma.cpp [7255:7376]


static int nccl_net_ofi_rdma_domain_create_endpoint(nccl_net_ofi_domain_t *base_domain,
						    nccl_net_ofi_ep_t **base_ep)
{
	int ret = 0;
	nccl_net_ofi_rdma_ep_t *ep = NULL;
	nccl_net_ofi_rdma_domain_t *domain = NULL;
	nccl_net_ofi_rdma_device_t *device = NULL;

	domain = (nccl_net_ofi_rdma_domain_t *)base_domain;
	if (OFI_UNLIKELY(domain == NULL)) {
		NCCL_OFI_WARN("Invalid domain provided");
		return -EINVAL;
	}

	device = rdma_domain_get_device(domain);
	assert(device != NULL);

	/* Allocate endpoint */
	ep = (nccl_net_ofi_rdma_ep_t *)calloc(1, sizeof(nccl_net_ofi_rdma_ep_t));
	if (!ep) {
		NCCL_OFI_WARN("Unable to allocate rdma endpoint");
		return -ENOMEM;
	}

	ret = nccl_net_ofi_endpoint_init(&domain->base, &ep->base);
	if (ret != 0) {
		NCCL_OFI_WARN("Initializing endpoint base failed");
		goto error;
	}

	ep->base.listen = listen;
	ep->base.connect = connect;
	ep->base.release_ep = nccl_net_ofi_rdma_endpoint_release;
	ep->base.free_ep = nccl_net_ofi_rdma_endpoint_free;

	ep->num_rails = domain->num_rails;

	if (ofi_nccl_rdma_rr_ctrl_msg()) {
		/*
		 * Round robin the control message across all rails by using dedicated
		 * endpoints with CQs shared with the data endpoints.
		 */
		ep->num_control_rails = domain->num_rails;
	} else {
		/*
		 * Use a single rail for control messages, with a dedicated
		 * endpoint and a CQ shared with the data endpoint.
		 */
		ep->num_control_rails = 1;
	}

	ep->use_long_rkeys = device->use_long_rkeys;

	ep->rails = (nccl_net_ofi_ep_rail_t *)calloc(ep->num_rails,
		sizeof(nccl_net_ofi_ep_rail_t));
	if (!ep->rails) {
		NCCL_OFI_WARN("Unable to allocate rdma rails");
		ret = -ENOMEM;
		goto error;
	}

	ep->control_rails = (nccl_net_ofi_ep_rail_t *)calloc(ep->num_control_rails, sizeof(nccl_net_ofi_ep_rail_t));
	if (!ep->control_rails) {
		NCCL_OFI_WARN("Unable to allocate rdma control rails");
		ret = -ENOMEM;
		goto error;
	}

	ep->pending_reqs_queue = new std::deque<nccl_net_ofi_rdma_req_t *>;

	ret = nccl_net_ofi_mutex_init(&ep->pending_reqs_lock, NULL);
	if (ret != 0) {
		NCCL_OFI_WARN("Mutex initialization failed: %s", strerror(ret));
		goto error;
	}

	ep->ctrl_rx_buff_size = std::max({sizeof(nccl_net_ofi_rdma_ctrl_msg_t),
	    sizeof(nccl_ofi_rdma_connection_info_t),
	    sizeof(nccl_net_ofi_rdma_close_msg_t)});
	ep->eager_send_size = ofi_nccl_eager_max_size();
	/* Work around EFA provider bug around posting 0 byte rx buffers by not
	   posting 0 byte rx buffers.  Note that if eager_send_size is -1
	   (disabled), eager_rx_buff_size will also be -1. */
	ep->eager_rx_buff_size = (ep->eager_send_size == 0) ?
		EAGER_RX_BUFFER_ALIGNMENT : ep->eager_send_size;

	ep->is_endpoint_per_communicator_ep = false;

	ret = init_rail_ofi_resources(device, domain, ep);
	if (ret != 0) {
		goto error;
	}

	ret = init_rx_buffers(ep);
	if (ret != 0) {
		NCCL_OFI_WARN("Preparation of rx buffers failed");
		goto error;
	}

	NCCL_OFI_TRACE(NCCL_NET, "RDMA endpoint %p for dev #%d is created",
			ep,
			device->base.dev_id);

	*base_ep = &ep->base;

	/* During plugin initialization, this function is invoked the
	 * first time. Consequently, initialization function of
	 * maximum write inline size is executed on initialization
	 * path the first time, avoiding data race on
	 * `max_write_inline_size` when `get_properties()` function
	 * reads the maximum write inline size variable. */
	if (ret == 0) {
		ret = init_max_write_inline_size_if_not_initialized(device, ep);
	}

error:
	if (ret != 0) {
		ep->base.release_ep(&(ep->base), false, false);
	}

	return ret;
}