def process_single_batch()

in tensorrtllm/run_eval.py [0:0]


    def process_single_batch(self, mel_batch, decoder_input_ids, mel_input_lengths, max_new_tokens):
        outputs = self.model_runner_cpp.generate(
            batch_input_ids=decoder_input_ids,
            encoder_input_features=mel_batch,
            encoder_output_lengths=mel_input_lengths // 2,
            max_new_tokens=max_new_tokens,
            end_id=self.eot_id,
            pad_id=self.eot_id,
            num_beams=1,
            output_sequence_lengths=True,
            return_dict=True
        )
        
        output_ids = outputs['output_ids'].cpu().numpy().tolist()
        texts = []
        for i in range(len(output_ids)):
            text = self.tokenizer.decode(output_ids[i][0]).strip()
            text = re.sub(r'<\|.*?\|>', '', text)
            texts.append(text)
        return texts