in Benchmarks/NVIDIA/NCCLBandwidth.py [0:0]
def run(self):
current = os.getcwd()
buffer=[["8 ","16 ","32 ","64 ","128 ","256 ","512 ","1K","2K","4K","8K","16K","32K","65K","132K","256K", "524K","1M","2M","4M","8M","16M","33M","67M","134M","268M","536M","1G","2G","4G","8G"]]
num_gpus = str(subprocess.run("nvidia-smi --query-gpu=name --format=csv,noheader | wc -l", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.decode('utf-8')).strip()
if num_gpus == '4':
self.algo = "Ring"
print("Running NCCL AllReduce on " + num_gpus + " GPUs")
results = subprocess.run('NCCL_ALGO='+ self.algo +' ./build/all_reduce_perf -b 8 -e 8G -f 2 -g ' + num_gpus + ' -n 40 | grep float', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
tools.write_log(tools.check_error(results))
res = results.stdout.decode('utf-8').split('\n')
log = []
for line in res:
line = line.split()
if len(line) == 13:
log.append(line[11])
buffer.append(log)
table1 = PrettyTable()
runs = ["Message Size", "Bandwidth (" + self.algo + ")"]
for i in range(len(buffer)):
table1.add_column(runs[i], buffer[i])
print(table1)
self.buffer=buffer
self.save()
os.chdir(current)