in horovod/torch/tensor_util.h [55:114]
static void ResizeNd(T* tensor, int nDimension, int64_t* size,
int64_t* stride);
template <DataType DT, DeviceType Dev, class T>
static void Copy(T* output, T* tensor);
template <DataType DT, DeviceType Dev, class T>
static void DivideTensorInPlace(T* tensor, int value);
#if HAVE_GPU
template <DataType DT, class T, class TC>
static void CopyCPUToCuda(T* cpu, TC* cuda);
template <DataType DT, class TC, class T>
static void AsyncCopyCudaToCPU(TC* cuda, T* cpu);
#endif
};
#define TENSOR_UTIL_DEFINE_TYPE_H(HorovodType, DeviceType, THTensor) \
template <> \
const TensorShape TensorUtil::GetShape<HorovodType, DeviceType, THTensor>( \
THTensor * tensor); \
template <> \
const void* TensorUtil::GetData<HorovodType, DeviceType, THTensor>( \
THTensor * tensor); \
template <> \
int64_t TensorUtil::GetSize<HorovodType, DeviceType, THTensor>(THTensor * \
tensor); \
template <> \
int TensorUtil::GetDevice<HorovodType, DeviceType, THTensor>(THTensor * \
tensor); \
\
template <> \
THTensor* TensorUtil::New<HorovodType, DeviceType, THTensor>(int device); \
template <> \
void TensorUtil::Free<HorovodType, DeviceType, THTensor>(THTensor * tensor); \
template <> \
void TensorUtil::ResizeNd<HorovodType, DeviceType, THTensor>( \
THTensor * tensor, int nDimension, int64_t* size, int64_t* stride); \
template <> \
void TensorUtil::Copy<HorovodType, DeviceType, THTensor>(THTensor * output, \
THTensor * tensor); \
template <> \
void TensorUtil::DivideTensorInPlace<HorovodType, DeviceType, THTensor>( \
THTensor * tensor, int value);
#define TENSOR_UTIL_DEFINE_CPU_TYPE_H(HorovodType, THTensor) \
TENSOR_UTIL_DEFINE_TYPE_H(HorovodType, DeviceType::CPU, THTensor)
#define TENSOR_UTIL_DEFINE_CUDA_TYPE_H(HorovodType, THCTensor, THTensor) \
TENSOR_UTIL_DEFINE_TYPE_H(HorovodType, DeviceType::GPU, THCTensor) \
\
template <> \
void TensorUtil::CopyCPUToCuda<HorovodType, THTensor, THCTensor>( \
THTensor * cpu, THCTensor * cuda); \
template <> \
void TensorUtil::AsyncCopyCudaToCPU<HorovodType, THCTensor, THTensor>( \
THCTensor * cuda, THTensor * cpu);
#define TENSOR_UTIL_DEFINE_CPU_TYPE(HorovodType, THTensor, THStorage) \
template <> \
const TensorShape TensorUtil::GetShape<HorovodType, DeviceType::CPU, \
THTensor>(THTensor * tensor) { \