in src/nccl_ofi_net.c [508:552]
static int find_ofi_provider(struct fi_info **providers)
{
int rc = 0;
struct fi_info *gdr_hints, *hints;
gdr_hints = fi_allocinfo();
hints = fi_allocinfo();
if ((gdr_hints == NULL) || (hints == NULL)) {
NCCL_OFI_WARN("Unable to allocate hints fi_info structure");
rc = -FI_ENOMEM;
goto exit;
}
/* Get hints for GPUDirect capable provider */
get_hints(gdr_hints, true);
rc = fi_getinfo(ofi_version, NULL, NULL, 0ULL, gdr_hints, providers);
if (rc == -FI_ENODATA) {
NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET,
"Could not find any optimal provider supporting GPUDirect RDMA");
/* Indicate that plugin doesn't support transfers using GPU buffers */
support_gdr = false;
/* Re-try finding non-GPUDirect capable provider */
get_hints(hints, false);
rc = fi_getinfo(ofi_version, NULL, NULL, 0ULL, hints, providers);
if (rc == -FI_ENODATA) {
NCCL_OFI_WARN("Couldn't find any optimal provider");
} else if (rc != 0) {
NCCL_OFI_WARN("OFI call failed with RC %d, %s", rc, fi_strerror(-rc));
}
}
else if (rc != 0) {
NCCL_OFI_WARN("OFI call failed with RC %d, %s", rc, fi_strerror(-rc));
}
exit:
if (gdr_hints)
fi_freeinfo(gdr_hints);
if (hints)
fi_freeinfo(hints);
return rc;
}