in parler_tts/configuration_parler_tts.py [0:0]
def __init__(self, vocab_size=1024, prompt_cross_attention=False, **kwargs):
super().__init__(**kwargs)
if "text_encoder" not in kwargs or "audio_encoder" not in kwargs or "decoder" not in kwargs:
raise ValueError("Config has to be initialized with text_encoder, audio_encoder and decoder config")
text_encoder_config = kwargs.pop("text_encoder")
text_encoder_model_type = text_encoder_config.pop("model_type")
audio_encoder_config = kwargs.pop("audio_encoder")
audio_encoder_model_type = audio_encoder_config.pop("model_type")
model_version = kwargs.get("transformers_version", None)
if model_version is not None and Version(model_version) <= Version("4.44.2dev") and use_dac_on_the_hub and audio_encoder_model_type=="dac":
# here we have to manually change model type if DAC based on transformers version
audio_encoder_model_type = "dac_on_the_hub"
decoder_config = kwargs.pop("decoder")
self.vocab_size = vocab_size
self.prompt_cross_attention = prompt_cross_attention
self.text_encoder = AutoConfig.for_model(text_encoder_model_type, **text_encoder_config)
self.audio_encoder = AutoConfig.for_model(audio_encoder_model_type, **audio_encoder_config)
self.decoder = ParlerTTSDecoderConfig(**decoder_config)
self.is_encoder_decoder = True