def prepare_datasets()

in Benchmarks/NVIDIA/LLMBenchmark.py [0:0]


    def prepare_datasets(self):
        for model_name in self.config['models']:
            max_isl = 0
            max_osl = 0
            max_sum = 0
            max_dataset_path = ""
            if self.config['models'][model_name]['use_model']:
                for i in range(len(self.config['models'][model_name]['input_sizes'])):
                    isl = self.config['models'][model_name]['input_sizes'][i]
                    osl = self.config['models'][model_name]['output_sizes'][i]
                    name = model_name.split('/')[1]
                    if (isl + osl > max_sum):
                        max_sum = isl + osl
                        max_isl = isl
                        max_osl = osl
                        max_dataset_path = self.dir_path + "/datasets/" + name + "_synthetic_" + str(max_isl) + "_" + str(max_osl) + ".txt"

                    dataset_path = self.dir_path + "/datasets/" + name + "_synthetic_" + str(isl) + "_" + str(osl) + ".txt"
                    prepare_dataset_command = f'''
                        python3 {self.dir_path}/TensorRT-LLM/benchmarks/cpp/prepare_dataset.py \
                        --stdout \
                        --tokenizer {model_name} \
                        token-norm-dist \
                        --num-requests {self.config['models'][model_name]['num_requests']} \
                        --input-mean {isl} \
                        --output-mean {osl} \
                        --input-stdev=0 \
                        --output-stdev=0 > {dataset_path}
                        '''

                    be2 = subprocess.run(prepare_dataset_command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
                    tools.write_log(tools.check_error(be2))

                if not os.path.exists(self.dir_path + "/engines/" + model_name):
                    print("Building engine for ", model_name)
                    build_engine_command = f'''
                        trtllm-bench \
                        --workspace {self.dir_path + "/engines"} \
                        --model {model_name} build \
                        --tp_size {self.config['models'][model_name]['tp_size']} \
                        --dataset {max_dataset_path} \
                        --quantization {self.config['models'][model_name]['precision']}
                        '''

                    be2 = subprocess.run(build_engine_command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
                    tools.write_log(tools.check_error(be2))