def __init__()

in models/encoders.py [0:0]


    def __init__(self, latent_dim: int = 128, model_name: str = 'audio_encoder'):
        """
        :param latent_dim: size of the latent audio embedding
        :param model_name: name of the model, used to load and save the model
        """
        super().__init__(model_name)

        self.melspec = ta.transforms.MelSpectrogram(
            sample_rate=16000, n_fft=2048, win_length=800, hop_length=160, n_mels=80
        )

        conv_len = 5
        self.convert_dimensions = th.nn.Conv1d(80, 128, kernel_size=conv_len)
        self.weights_init(self.convert_dimensions)
        self.receptive_field = conv_len

        convs = []
        for i in range(6):
            dilation = 2 * (i % 3 + 1)
            self.receptive_field += (conv_len - 1) * dilation
            convs += [th.nn.Conv1d(128, 128, kernel_size=conv_len, dilation=dilation)]
            self.weights_init(convs[-1])
        self.convs = th.nn.ModuleList(convs)
        self.code = th.nn.Linear(128, latent_dim)

        self.apply(lambda x: self.weights_init(x))