in models/encoders.py [0:0]
def __init__(self,
classes: int = 128,
heads: int = 64,
expression_dim: int = 128,
audio_dim: int = 128,
n_vertices: int = 6172,
mean: th.Tensor = None,
stddev: th.Tensor = None,
model_name: str = "encoder"
):
"""
:param classes: number of classes for the categorical latent embedding
:param heads: number of heads for the categorical latent embedding
:param expression_dim: size of the latent expression embedding before quantization through Gumbel softmax
:param audio_dim: size of the latent audio embedding
:param n_vertices: number of vertices in the face mesh
:param mean: mean position of each vertex
:param stddev: standard deviation of each vertex position
:param model_name: name of the model, used to load and save the model
"""
super().__init__(model_name)
self.audio_encoder = AudioEncoder(audio_dim)
self.expression_encoder = ExpressionEncoder(expression_dim, n_vertices, mean, stddev)
self.fusion_model = FusionMlp(classes, heads, expression_dim, audio_dim)