static int listen()

in src/nccl_ofi_rdma.cpp [5181:5260]


static int listen(nccl_net_ofi_ep_t *base_ep,
			     nccl_net_ofi_conn_handle_t *handle,
			     nccl_net_ofi_listen_comm_t **listen_comm)
{
	int ret = 0;
	nccl_net_ofi_rdma_listen_comm_t *l_comm = NULL;
	size_t comm_id = 0;
	nccl_net_ofi_rdma_ep_t *ep =
		(nccl_net_ofi_rdma_ep_t *)base_ep;
	nccl_net_ofi_ep_rail_t *first_control_rail = rdma_endpoint_get_control_rail(ep, 0);

	/* Retrieve and validate device */
	nccl_net_ofi_rdma_device_t *device = rdma_endpoint_get_device(ep);
	assert(device != NULL);

	int dev_id = device->base.dev_id;

	ret = post_rx_buffs(ep);
	if (ret != 0) {
		NCCL_OFI_WARN("Error posting rx buffers: %d", ret);
		return ret;
	}

	/* Build handle */
	memset(handle, 0, sizeof(nccl_net_ofi_conn_handle_t));
	assert(sizeof(handle->ep_name) == sizeof(first_control_rail->local_ep_name));
	memcpy(handle->ep_name, first_control_rail->local_ep_name,
	       first_control_rail->local_ep_name_len);
	/* We don't copy the size here since the handle doesn't have a size field.
	   The size will be distributed later by the connect response message.
	   Instead, zero the unused bytes here. */
	memset(handle->ep_name + first_control_rail->local_ep_name_len, 0,
		sizeof(handle->ep_name) - first_control_rail->local_ep_name_len);

	/* Build listen_comm */
	l_comm = (nccl_net_ofi_rdma_listen_comm_t *)calloc(1,
							   sizeof(nccl_net_ofi_rdma_listen_comm_t));
	if (OFI_UNLIKELY(l_comm == NULL)) {
		NCCL_OFI_WARN("Couldn't allocate listen_comm for dev %d", dev_id);
		ret = -ENOMEM;
		goto error;
	}

	/* Initialize listen communicator */
	l_comm->base.base.type = NCCL_NET_OFI_LISTEN_COMM;
	l_comm->base.base.ep = base_ep;
	l_comm->base.base.dev_id = dev_id;
	l_comm->base.accept = accept;
	l_comm->base.close = listen_close;

	/* Allocate listen communicator ID */
	comm_id = device->comm_idpool->allocate_id();
	if (OFI_UNLIKELY(comm_id == FI_KEY_NOTAVAIL)) {
		l_comm->comm_id = COMM_ID_INVALID;
		ret = -ENOMEM;
		goto error;
	}
	l_comm->comm_id = (uint32_t)comm_id;
	handle->comm_id = l_comm->comm_id;

	/*  Add listen comm to ep's lookup array */
	rdma_device_set_comm(device, l_comm->comm_id, &l_comm->base.base);

	/* Prepare receive request to accept connections */
	ret = prepare_recv_conn_req(l_comm);
	if (ret != 0)
		goto error;

	*listen_comm = &l_comm->base;

	goto exit;

error:
	if (l_comm && COMM_ID_INVALID != l_comm->comm_id) {
		device->comm_idpool->free_id(l_comm->comm_id);
	}
	free(l_comm);
 exit:
	return ret;
}