in src/tuner/nccl_ofi_regions.cpp [917:980]
ncclResult_t region_get_coll_info_internal_v2(nccl_ofi_tuner_context_t *ctx,
ncclFunc_t collType,
size_t nBytes,
int collNetSupport,
int nvlsSupport,
int numPipeOps,
int *algorithm,
int *protocol,
int *nChannels)
{
ncclResult_t ret = ncclSuccess;
nccl_ofi_tuner_region_context_t *region_ctx = (nccl_ofi_tuner_region_context_t *)ctx->type_ctx;
int in_out = -1;
nccl_ofi_tuner_point_t p;
if (region_ctx == NULL || region_ctx->regions[collType] == NULL) {
/* we do not update cost table. Fall back to NCCL's tuner */
NCCL_OFI_INFO(NCCL_TUNING, "Region Context is not ready. Fall back to NCCL's tuner.");
ret = ncclSuccess;
goto exit;
}
/* Skip when two nodes or lesser because the regions are not well defined and fallback
* to NCCL's internal tunings */
if (region_ctx->dims.num_nodes <= 2) {
ret = ncclSuccess;
goto exit;
}
p.x = (double)nBytes;
p.y = (double)region_ctx->dims.num_ranks;
/* Check all regions */
for (size_t i = 0; i < region_ctx->num_regions[collType] && in_out < 0; i++) {
/* PAT is not supported in V2 tuner, in this case revert to nccl internal tuner */
if (region_ctx->regions[collType][i].algorithm == NCCL_ALGO_PAT) {
continue;
}
if (region_ctx->regions[collType][i].algorithm == NCCL_ALGO_NVLS_TREE && nvlsSupport == 0) {
continue;
}
in_out = is_inside_region(p, ®ion_ctx->regions[collType][i]);
if (in_out >= 0) {
*algorithm = region_ctx->regions[collType][i].algorithm;
*protocol = region_ctx->regions[collType][i].protocol;
NCCL_OFI_INFO(NCCL_TUNING,
"Region TUner choosing algo %d proto %d with cost %.8f µsecs for coll %d size %ld.",
*algorithm,
*protocol,
0.0,
collType,
nBytes);
}
}
if (in_out < 0) {
NCCL_OFI_INFO(NCCL_TUNING, "Falling back to NCCL's tuner for coll %d size %ld.", collType, nBytes);
}
exit:
return ret;
}