in tensorrtllm/run_eval.py [0:0]
def process_single_batch(self, mel_batch, decoder_input_ids, mel_input_lengths, max_new_tokens):
outputs = self.model_runner_cpp.generate(
batch_input_ids=decoder_input_ids,
encoder_input_features=mel_batch,
encoder_output_lengths=mel_input_lengths // 2,
max_new_tokens=max_new_tokens,
end_id=self.eot_id,
pad_id=self.eot_id,
num_beams=1,
output_sequence_lengths=True,
return_dict=True
)
output_ids = outputs['output_ids'].cpu().numpy().tolist()
texts = []
for i in range(len(output_ids)):
text = self.tokenizer.decode(output_ids[i][0]).strip()
text = re.sub(r'<\|.*?\|>', '', text)
texts.append(text)
return texts