def run_benchmark()

in Benchmarks/AMD/LLMBenchmark.py [0:0]


    def run_benchmark(self):
        for model_name in self.config['models']:
            if self.config['models'][model_name]['use_model'] and self.config['models'][model_name]['type'] == "amd":
                for tp_size in self.config['models'][model_name]['tp_sizes']:
                    for max_num_seq in self.config['models'][model_name]['max_num_seqs']:
                        for i in range(len(self.config['models'][model_name]['input_length'])):
                            for request in self.config['models'][model_name]['num_requests']:
                                input_size = self.config['models'][model_name]['input_length'][i]
                                output_size = self.config['models'][model_name]['output_length'][i]
                                print(f"Benchmarking {model_name} | TP Size: {tp_size} | Input Size: {input_size} | Output Size: {output_size}")
                                run_benchmark_command = f'''
                                    /bin/bash -c \
                                    "python /app/vllm/benchmarks/benchmark_throughput.py \
                                        --model amd/{model_name} \
                                        --quantization fp8 \
                                        --kv-cache-dtype fp8 \
                                        --dtype half \
                                        --gpu-memory-utilization 0.90 \
                                        --distributed-executor-backend mp \
                                        --num-scheduler-steps 10 \
                                        --tensor-parallel-size {tp_size} \
                                        --enable-chunked-prefill false \
                                        --max-seq-len-to-capture 131072 \
                                        --max-num-batched-tokens 131072 \
                                        --max-model-len 8192 \
                                        --max-num-seqs {max_num_seq} \
                                        --num-prompts {request} \
                                        --input-len {input_size} \
                                        --output-len {output_size}"
                                    '''

                                rb1 = self.container.exec_run(run_benchmark_command)

                                tools.write_log(rb1.output.decode('utf-8'))

                                temp = rb1.output.decode('utf-8').split('\n')
                                for line in temp:
                                    if "Throughput: " in line:
                                        result = line.split(' ')[6]
                                        table1 = PrettyTable()
                                        table1.add_row(['Model Name', model_name])
                                        table1.add_row(['Input/Output lengths', str(input_size) + "/" + str(output_size)])
                                        table1.add_row(['World Size (TP size)', str(tp_size)])
                                        table1.add_row(['Throughput (tokens/sec)', str(result)])

                                        print(table1.get_string(header=False))
                                        self.save_data([model_name, str(input_size), str(output_size), str(tp_size), str(result)], 'Outputs/LLMBenchmark_' + self.machine + '.csv')

        self.container.kill()