def process()

in TTS/facebookmms_handler.py [0:0]


    def process(self, llm_sentence):
        language_code = None

        if isinstance(llm_sentence, tuple):
            llm_sentence, language_code = llm_sentence

        console.print(f"[green]ASSISTANT: {llm_sentence}")
        logger.debug(f"Processing text: {llm_sentence}")
        logger.debug(f"Language code: {language_code}")

        if language_code is not None and self.language != language_code:
            try:
                logger.info(f"Switching language from {self.language} to {language_code}")
                self.load_model(language_code)
            except KeyError:
                console.print(f"[red]Language {language_code} not supported by Facebook MMS. Using {self.language} instead.")
                logger.warning(f"Unsupported language: {language_code}")

        audio_output = self.generate_audio(llm_sentence)
        
        if audio_output is None or audio_output.numel() == 0:
            logger.warning("No audio output generated")
            self.should_listen.set()
            return

        audio_numpy = audio_output.cpu().numpy().squeeze()
        logger.debug(f"Raw audio shape: {audio_numpy.shape}, dtype: {audio_numpy.dtype}")
        
        audio_resampled = librosa.resample(audio_numpy, orig_sr=self.model.config.sampling_rate, target_sr=16000)
        logger.debug(f"Resampled audio shape: {audio_resampled.shape}, dtype: {audio_resampled.dtype}")
        
        audio_int16 = (audio_resampled * 32768).astype(np.int16)
        logger.debug(f"Final audio shape: {audio_int16.shape}, dtype: {audio_int16.dtype}")

        if self.stream:
            for i in range(0, len(audio_int16), self.chunk_size):
                chunk = audio_int16[i:i + self.chunk_size]
                yield np.pad(chunk, (0, self.chunk_size - len(chunk)))
        else:
            for i in range(0, len(audio_int16), self.chunk_size):
                yield np.pad(
                    audio_int16[i : i + self.chunk_size],
                    (0, self.chunk_size - len(audio_int16[i : i + self.chunk_size])),
                )

        self.should_listen.set()