static int sendrecv_mr_buffers_register()

in src/nccl_ofi_sendrecv.cpp [632:732]


static int sendrecv_mr_buffers_register(struct fid_domain *domain,
					struct fid_ep *ep,
					nccl_ofi_idpool_t *key_pool,
					int dev_id,
					nccl_ofi_mr_ckey_ref ckey,
					int type,
					nccl_net_ofi_sendrecv_mr_handle_t **mr_handle)
{
	int ret = 0;
	struct fi_mr_attr mr_attr = {};
	uint64_t regattr_flags = 0;
	auto *ret_handle = new nccl_net_ofi_sendrecv_mr_handle_t{MR_KEY_INIT_VALUE, nullptr};

	mr_attr.access = FI_SEND | FI_RECV;
	nccl_ofi_mr_ckey_fill_mr_attrs(ckey, &mr_attr, &regattr_flags);
	switch (type) {
	case NCCL_PTR_HOST:
		if (support_fi_rma) {
			mr_attr.access |= FI_READ;
		}
		mr_attr.iface = FI_HMEM_SYSTEM;
		break;
#if HAVE_CUDA
	case NCCL_PTR_CUDA:
		if (support_fi_rma) {
			mr_attr.access |= FI_REMOTE_READ;
		}
		mr_attr.iface = FI_HMEM_CUDA;

		/* Get CUDA device ID */
		ret = nccl_net_ofi_get_cuda_device_for_addr((void *)nccl_ofi_mr_ckey_baseaddr(ckey),
		                                            &mr_attr.device.cuda);
		if (OFI_UNLIKELY(ret != 0)) {
			goto exit;
		}
		break;
#endif
#if HAVE_NEURON
	case NCCL_PTR_NEURON:
		mr_attr.access |= FI_REMOTE_READ;
		mr_attr.iface = FI_HMEM_NEURON;
		/*
		 * Store a sentinel; libfabric requires this to be initialized Libfabric
		 * requires the device.neuron field to be set for Neuron HMEM, but the EFA
		 * provider does not use the value.  Store an invalid device id sentinel to
		 * both follow the Libfabric spec and cause an error if a provider uses the
		 * value in the future.
		 */
		mr_attr.device.neuron = -1;
		break;
#endif
	default:
		ret = -EINVAL;
		goto exit;
	}

	if (key_pool->get_size() != 0) {
		size_t key = key_pool->allocate_id();
		if (OFI_UNLIKELY(key == FI_KEY_NOTAVAIL)) {
			NCCL_OFI_WARN("MR key allocation failed");
			ret = -ENOMEM;
			goto exit;
		}
		ret_handle->mr_key = static_cast<uint64_t>(key);
		mr_attr.requested_key = ret_handle->mr_key;
	}

	ret = fi_mr_regattr(domain, &mr_attr, regattr_flags, &ret_handle->mr);
	if (OFI_UNLIKELY(ret != 0)) {
		NCCL_OFI_WARN("Unable to register memory (type = %d) for device %d. RC: %d, Error: %s",
			      type, dev_id, ret, fi_strerror(-ret));
		goto exit;
	}

	if (endpoint_mr) {
		ret = fi_mr_bind(ret_handle->mr, &ep->fid, 0);
		if (OFI_UNLIKELY(ret != 0)) {
			NCCL_OFI_WARN("Unable to bind MR to EP (type = %d) for device %d. RC: %d, Error: %s",
				      type, dev_id, ret, fi_strerror(-ret));
			goto exit;
		}

		ret = fi_mr_enable(ret_handle->mr);
		if (OFI_UNLIKELY(ret != 0)) {
			NCCL_OFI_WARN("Unable to enable MR (type = %d) for device %d. RC: %d, Error: %s",
				      type, dev_id, ret, fi_strerror(-ret));
			goto exit;
		}
	}

	*mr_handle = ret_handle;
	return 0;
exit:
	if (ret_handle != nullptr) {
		sendrecv_comm_mr_base_dereg(ret_handle, key_pool, nullptr);
		ret_handle = nullptr;
	}

	*mr_handle = nullptr;
	return ret;
}