in models/encoders.py [0:0]
def __init__(self, classes: int = 128, heads: int = 64, expression_dim: int = 128, audio_dim: int = 128,
model_name: str = 'fusion_model'):
"""
:param classes: number of classes for the categorical latent embedding
:param heads: number of heads for the categorical latent embedding
:param expression_dim: size of the latent expression embedding before quantization through Gumbel softmax
:param audio_dim: size of the latent audio embedding
:param model_name: name of the model, used to load and save the model
"""
super().__init__(model_name)
self.classes = classes
self.heads = heads
latent_dim = 256
self.mlp = th.nn.Sequential(
th.nn.Linear(expression_dim + audio_dim, latent_dim),
th.nn.LeakyReLU(negative_slope=0.2, inplace=True),
th.nn.Linear(latent_dim, latent_dim),
th.nn.LeakyReLU(negative_slope=0.2, inplace=True),
th.nn.Linear(latent_dim, heads * classes)
)