def run()

in Benchmarks/NVIDIA/NCCLBandwidth.py [0:0]


    def run(self):
        current = os.getcwd()
        buffer=[["8 ","16 ","32 ","64 ","128 ","256 ","512 ","1K","2K","4K","8K","16K","32K","65K","132K","256K", "524K","1M","2M","4M","8M","16M","33M","67M","134M","268M","536M","1G","2G","4G","8G"]]
        num_gpus = str(subprocess.run("nvidia-smi --query-gpu=name --format=csv,noheader | wc -l", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.decode('utf-8')).strip()
        if num_gpus == '4':
            self.algo = "Ring"

        print("Running NCCL AllReduce on " + num_gpus + " GPUs")
      
        results = subprocess.run('NCCL_ALGO='+ self.algo +' ./build/all_reduce_perf -b 8 -e 8G -f 2 -g ' + num_gpus + ' -n 40 | grep float', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        tools.write_log(tools.check_error(results))
        res = results.stdout.decode('utf-8').split('\n')
        log = []
        for line in res:
            line = line.split()
            if len(line) == 13:
                log.append(line[11])

        buffer.append(log)

        table1 = PrettyTable()
        runs = ["Message Size", "Bandwidth (" + self.algo + ")"]
        for i in range(len(buffer)):
            table1.add_column(runs[i], buffer[i])
        print(table1)
        self.buffer=buffer
        self.save()
        os.chdir(current)