in Benchmarks/NVIDIA/LLMBenchmark.py [0:0]
def prepare_datasets(self):
for model_name in self.config['models']:
max_isl = 0
max_osl = 0
max_sum = 0
max_dataset_path = ""
if self.config['models'][model_name]['use_model']:
for i in range(len(self.config['models'][model_name]['input_sizes'])):
isl = self.config['models'][model_name]['input_sizes'][i]
osl = self.config['models'][model_name]['output_sizes'][i]
name = model_name.split('/')[1]
if (isl + osl > max_sum):
max_sum = isl + osl
max_isl = isl
max_osl = osl
max_dataset_path = self.dir_path + "/datasets/" + name + "_synthetic_" + str(max_isl) + "_" + str(max_osl) + ".txt"
dataset_path = self.dir_path + "/datasets/" + name + "_synthetic_" + str(isl) + "_" + str(osl) + ".txt"
prepare_dataset_command = f'''
python3 {self.dir_path}/TensorRT-LLM/benchmarks/cpp/prepare_dataset.py \
--stdout \
--tokenizer {model_name} \
token-norm-dist \
--num-requests {self.config['models'][model_name]['num_requests']} \
--input-mean {isl} \
--output-mean {osl} \
--input-stdev=0 \
--output-stdev=0 > {dataset_path}
'''
be2 = subprocess.run(prepare_dataset_command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
tools.write_log(tools.check_error(be2))
if not os.path.exists(self.dir_path + "/engines/" + model_name):
print("Building engine for ", model_name)
build_engine_command = f'''
trtllm-bench \
--workspace {self.dir_path + "/engines"} \
--model {model_name} build \
--tp_size {self.config['models'][model_name]['tp_size']} \
--dataset {max_dataset_path} \
--quantization {self.config['models'][model_name]['precision']}
'''
be2 = subprocess.run(build_engine_command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
tools.write_log(tools.check_error(be2))