in src/nccl_ofi_rdma.cpp [2930:2994]
static inline int reg_mr_on_device(nccl_net_ofi_rdma_domain_t *domain,
nccl_ofi_mr_ckey_ref ckey,
int type,
nccl_net_ofi_rdma_mr_handle_t **mhandle)
{
int ret = 0;
nccl_net_ofi_rdma_mr_handle_t *ret_handle = NULL;
struct fi_mr_attr mr_attr = {};
uint64_t regattr_flags = 0;
int num_rails = domain->num_rails;
nccl_ofi_idpool_t *key_pool = domain->base.mr_rkey_pool;
*mhandle = NULL;
/* Allocate rdma memory registration handle */
ret_handle = (nccl_net_ofi_rdma_mr_handle_t *)calloc(1, sizeof(nccl_net_ofi_rdma_mr_handle_t));
if (OFI_UNLIKELY(!ret_handle)) {
NCCL_OFI_WARN("Unable to allocate memory registration handle");
return -ENOMEM;
}
ret_handle->mr = (struct fid_mr **)calloc(num_rails, sizeof(struct fid_mr *));
if (OFI_UNLIKELY(!ret_handle->mr)) {
NCCL_OFI_WARN("Unable to allocate memory registration handles array");
ret = -ENOMEM;
goto error;
}
if (key_pool->get_size() != 0) {
auto key = key_pool->allocate_id();
if (OFI_UNLIKELY(key == FI_KEY_NOTAVAIL)) {
NCCL_OFI_WARN("MR key allocation failed");
ret = -ENOMEM;
goto error;
}
ret_handle->mr_key = static_cast<uint64_t>(key);
}
/* Create memory registration request */
ret = set_mr_req_attr(ret_handle->mr_key, ckey, ®attr_flags, type, &mr_attr);
if (OFI_UNLIKELY(ret != 0)) {
NCCL_OFI_WARN("Could not set registration request attributes, dev: %d",
rdma_domain_get_device(domain)->base.dev_id);
goto error;
}
/* Register memory on each rail */
ret_handle->num_rails = num_rails;
for (uint16_t rail_id = 0; rail_id != num_rails; ++rail_id) {
nccl_net_ofi_rdma_domain_rail_t *domain_rail = rdma_domain_get_rail(domain, rail_id);
ret = fi_mr_regattr(domain_rail->domain, &mr_attr,
regattr_flags, &ret_handle->mr[rail_id]);
if (OFI_UNLIKELY(ret != 0)) {
goto error;
}
}
*mhandle = ret_handle;
return 0;
error:
(void) dereg_mr(ret_handle, domain);
return ret;
}