include/internal/tuner/nccl_defaults.h (15 lines of code) (raw):
/*
* Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef NCCL_DEFAULTS_H_
#define NCCL_DEFAULTS_H_
/* This file will ideally be dropped in the future, where these parameters would
* be provided by the tuner when called. For now, these constants are needed to
* adjust ring latency costs. */
/**
* @brief Number of steps for NCCL protocols.
*/
#define NCCL_OFI_TUNER_NCCL_STEPS (8ULL)
/**
* @brief Size of NCCL_LL_FIFOLINE structure.
*/
#define NCCL_OFI_TUNER_NCCL_SIZEOF_NCCL_LL_FIFOLINE (16ULL)
/**
* @brief Size of a warp in CUDA.
*/
#define NCCL_OFI_TUNER_NCCL_WARP_SIZE (32ULL)
/**
* @brief Maximum number of channels in NCCL.
*/
#define NCCL_OFI_TUNER_NCCL_MAXCHANNELS (32ULL)
/**
* @brief Maximum number of threads for NCCL protocols.
*/
#define NCCL_OFI_TUNER_NCCL_MAX_NTHREADS (640ULL)
/**
* @brief Maximum number of threads for NCCL_PROTO_SIMPLE protocol.
*/
#define NCCL_OFI_TUNER_NCCL_SIMPLE_MAX_NTHREADS (512ULL)
/**
* @brief Maximum number of threads for NCCL_PROTO_LL protocol.
*/
#define NCCL_OFI_TUNER_NCCL_LL_MAX_NTHREADS (512ULL)
/**
* @brief Number of lines per thread for NCCL_PROTO_LL protocol.
*/
#define NCCL_OFI_TUNER_NCCL_LL_LINES_PER_THREAD (8ULL)
/**
* @brief Maximum number of threads for NCCL_PROTO_LL128 protocol.
*/
#define NCCL_OFI_TUNER_NCCL_LL128_MAX_NTHREADS (640ULL)
/**
* @brief Number of elements per thread for NCCL_PROTO_LL128 protocol.
*/
#define NCCL_OFI_TUNER_NCCL_LL128_ELEMS_PER_THREAD (120ULL)
/**
* @brief Expected data type size.
*/
#define NCCL_OFI_TUNER_EXPECTED_DTYPE_SIZE (4ULL)
/**
* @brief Buffer size for NCCL_PROTO_SIMPLE protocol.
*/
#define NCCL_OFI_TUNER_NCCL_BUFFSIZE (1 << 22)
#endif // NCCL_DEFAULTS_H_