static inline int sendrecv_send_comm_create()

in src/nccl_ofi_sendrecv.cpp [2006:2112]


static inline int sendrecv_send_comm_create(nccl_net_ofi_conn_handle_t *handle,
					    nccl_net_ofi_sendrecv_ep_t *ep,
					    nccl_net_ofi_sendrecv_send_comm_t **s_comm)
{
	char remote_ep_addr[MAX_EP_ADDR] = {};
	uint64_t tag = 0ULL;
	uint64_t max_tag = 0;
	size_t req_size = sizeof(nccl_net_ofi_sendrecv_req_t);
	fi_addr_t remote_addr;
	nccl_net_ofi_sendrecv_send_comm_t *ret_s_comm = NULL;
	nccl_ofi_connection_info_t *conn_info = NULL;
	*s_comm = NULL;
	int ret = 0;

	/* Retrieve and validate device */
	nccl_net_ofi_sendrecv_device_t *device = sendrecv_endpoint_get_device(ep);
	if (OFI_UNLIKELY(device == NULL)) {
		NCCL_OFI_WARN("Error accessing device.");
		return -EINVAL;
	}

	max_tag = device->max_tag;

	/* Get tag and remote name from handle */
	memcpy(&remote_ep_addr, handle->ep_name, MAX_EP_ADDR);
	memcpy(&tag, &handle->comm_id, sizeof(handle->comm_id));
	if (tag < 1 || tag > max_tag) {
		NCCL_OFI_WARN("Received an invalid tag %lu for device %d", tag,
			      device->base.dev_id);
		return -EINVAL;
	}

	/* Insert remote address into AV */
	ret = fi_av_insert(ep->av,
			   (void *)remote_ep_addr, 1,
			   &remote_addr, 0, NULL);
	if (OFI_UNLIKELY(ret != 1)) {
		NCCL_OFI_WARN("Unable to insert remote address into address vector for device %d. RC: %d",
			      device->base.dev_id, ret);
		return -EINVAL;
	}

	/* Allocate and initialize send_comm */
	ret_s_comm = (nccl_net_ofi_sendrecv_send_comm_t *)
		calloc(1, sizeof(nccl_net_ofi_sendrecv_send_comm_t));
	if (OFI_UNLIKELY(ret_s_comm == NULL)) {
		NCCL_OFI_WARN("Couldn't allocate send_comm for dev %d", device->base.dev_id);
		return -ENOMEM;
	}

	ret_s_comm->base.base.type = NCCL_NET_OFI_SEND_COMM;
	ret_s_comm->base.base.ep = &ep->base;
	ret_s_comm->base.base.dev_id = device->base.dev_id;
	ret_s_comm->base.regMr = sendrecv_send_comm_reg_mr;
	ret_s_comm->base.deregMr = sendrecv_send_comm_dereg_mr;
	ret_s_comm->base.send = sendrecv_send_comm_send;
	ret_s_comm->base.close = sendrecv_send_comm_close;
	ret_s_comm->base.write = NULL;
	ret_s_comm->base.write_inline = NULL;
	ret_s_comm->tag = tag;
	ret_s_comm->local_ep = ep->ofi_ep;
	ret_s_comm->remote_ep = remote_addr;

	ret_s_comm->conn_info = nccl_ofi_freelist_entry_alloc(ep->conn_msg_fl);
	if (ret_s_comm->conn_info == NULL) {
		NCCL_OFI_WARN("Could not allocate connect connection info");
		ret = -ENOMEM;
		goto out;
	}
	
	conn_info = (nccl_ofi_connection_info_t *)ret_s_comm->conn_info->ptr;

	conn_info->ep_namelen = sizeof(conn_info->ep_name);

	ret = fi_getname(&(ep->ofi_ep->fid),
			 (void *)conn_info->ep_name,
			 &conn_info->ep_namelen);
	if (ret == -FI_ETOOSMALL) {
		NCCL_OFI_WARN("Endpoint's address length (%zu) is larger than supplied buffer length (%d)",
			      conn_info->ep_namelen, MAX_EP_ADDR);
		goto out;
	} else if (ret != 0) {
		NCCL_OFI_WARN("Call to fi_getname() failed with RC: %d, ERROR: %s",
			      ret, fi_strerror(-ret));
		goto out;
	}

	conn_info->connect_to_self =
		(0 == memcmp(conn_info->ep_name, remote_ep_addr, conn_info->ep_namelen)) ? 1 : 0;

	/* Pre-allocated buffers for data path */
	ret = nccl_ofi_freelist_init(req_size, 16, 16, NCCL_OFI_MAX_SEND_REQUESTS,
				     sendrecv_fl_req_entry_init, NULL,
				     &ret_s_comm->nccl_ofi_reqs_fl);
	if (OFI_UNLIKELY(ret != 0)) {
		NCCL_OFI_WARN("Could not allocate NCCL OFI requests free list for dev %d",
			      device->base.dev_id);
		goto out;
	}

	*s_comm = ret_s_comm;
out:
	if (ret)
		free(ret_s_comm);

	return ret;
}