cmake/build_utils.py (74 lines of code) (raw):

# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import os import subprocess import sys def is_mx_cuda(): try: from mxnet import runtime features = runtime.Features() return features.is_enabled('CUDA') except Exception: if 'linux' in sys.platform: try: import mxnet as mx mx_libs = mx.libinfo.find_lib_path() for mx_lib in mx_libs: output = subprocess.check_output(['readelf', '-d', mx_lib]) if 'cuda' in str(output): return True return False except Exception: return False return False def is_mx_mkldnn(): try: from mxnet import runtime features = runtime.Features() return features.is_enabled('MKLDNN') except Exception: msg = 'INFO: Cannot detect if MKLDNN is enabled in MXNet. Please \ set MXNET_USE_MKLDNN=1 if MKLDNN is enabled in your MXNet build.' if 'linux' not in sys.platform: # MKLDNN is only enabled by default in MXNet Linux build. Return # False by default for non-linux build but still allow users to # enable it by using MXNET_USE_MKLDNN env variable. print(msg) return os.environ.get('MXNET_USE_MKLDNN', '0') == '1' else: try: import mxnet as mx mx_libs = mx.libinfo.find_lib_path() for mx_lib in mx_libs: output = subprocess.check_output(['readelf', '-d', mx_lib]) if 'mkldnn' in str(output): return True return False except Exception: print(msg) return os.environ.get('MXNET_USE_MKLDNN', '0') == '1' def get_nvcc_bin(): cuda_home = os.environ.get('HOROVOD_CUDA_HOME', '/usr/local/cuda') cuda_nvcc = os.path.join(cuda_home, 'bin', 'nvcc') for nvcc_bin in ['nvcc', cuda_nvcc]: try: subprocess.check_output([nvcc_bin, '--version']) return nvcc_bin except Exception: pass raise RuntimeError('Cannot find `nvcc`. `nvcc` is required to build Horovod with GPU operations. ' 'Make sure it is added to your path or in $HOROVOD_CUDA_HOME/bin.') def get_nvcc_flags(): default_flags = ['--std=c++11', '-O3', '-Xcompiler', '-fPIC'] cc_list_env = os.environ.get('HOROVOD_BUILD_CUDA_CC_LIST') # Invoke nvcc and extract all supported compute capabilities for CUDA toolkit version nvcc_bin = get_nvcc_bin() full_cc_list = subprocess.check_output(f"{nvcc_bin} --help | " f"sed -n -e '/gpu-architecture <arch>/,/gpu-code <code>/ p' | " f"sed -n -e '/Allowed values/,/gpu-code <code>/ p' | " f"grep -i sm_ | " f"grep -Eo 'sm_[0-9]+' | " f"sed -e s/sm_//g | " f"sort -g -u | " f"tr '\n' ' '", shell=True).strip().split() full_cc_list = [int(i) for i in full_cc_list] # Build native kernels for specified compute capabilities cc_list = full_cc_list if cc_list_env is None else [int(x) for x in cc_list_env.split(',')] for cc in cc_list: default_flags += ['-gencode', 'arch=compute_{cc},code=sm_{cc}'.format(cc=cc)] # Build PTX for maximum specified compute capability default_flags += ['-gencode', 'arch=compute_{cc},code=compute_{cc}'.format(cc=max(cc_list))] return default_flags