ncclResult_t region_get_coll_info_internal_v2()

in src/tuner/nccl_ofi_regions.cpp [917:980]


ncclResult_t region_get_coll_info_internal_v2(nccl_ofi_tuner_context_t *ctx,
					      ncclFunc_t collType,
					      size_t nBytes,
					      int collNetSupport,
					      int nvlsSupport,
					      int numPipeOps,
					      int *algorithm,
					      int *protocol,
					      int *nChannels)
{
	ncclResult_t ret = ncclSuccess;
	nccl_ofi_tuner_region_context_t *region_ctx = (nccl_ofi_tuner_region_context_t *)ctx->type_ctx;
	int in_out = -1;
	nccl_ofi_tuner_point_t p;

	if (region_ctx == NULL || region_ctx->regions[collType] == NULL) {
		/* we do not update cost table. Fall back to NCCL's tuner */
		NCCL_OFI_INFO(NCCL_TUNING, "Region Context is not ready. Fall back to NCCL's tuner.");
		ret = ncclSuccess;
		goto exit;
	}

	/* Skip when two nodes or lesser because the regions are not well defined and fallback
	 * to NCCL's internal tunings */
	if (region_ctx->dims.num_nodes <= 2) {
		ret = ncclSuccess;
		goto exit;
	}

	p.x = (double)nBytes;
	p.y = (double)region_ctx->dims.num_ranks;

	/* Check all regions */
	for (size_t i = 0; i < region_ctx->num_regions[collType] && in_out < 0; i++) {
		/* PAT is not supported in V2 tuner, in this case revert to nccl internal tuner */
		if (region_ctx->regions[collType][i].algorithm == NCCL_ALGO_PAT) {
			continue;
		}
		if (region_ctx->regions[collType][i].algorithm == NCCL_ALGO_NVLS_TREE && nvlsSupport == 0) {
			continue;
		}

		in_out = is_inside_region(p, &region_ctx->regions[collType][i]);
		if (in_out >= 0) {
			*algorithm = region_ctx->regions[collType][i].algorithm;
			*protocol = region_ctx->regions[collType][i].protocol;

			NCCL_OFI_INFO(NCCL_TUNING,
					"Region TUner choosing algo %d proto %d with cost %.8f µsecs for coll %d size %ld.",
					*algorithm,
					*protocol,
					0.0,
					collType,
					nBytes);
		}
	}

	if (in_out < 0) {
		NCCL_OFI_INFO(NCCL_TUNING, "Falling back to NCCL's tuner for coll %d size %ld.", collType, nBytes);
	}

exit:
	return ret;
}