in mmf/models/mmf_transformer.py [0:0]
def build_layers(self):
for modality in self.model_config.modalities:
layer_norm_eps = getattr(
modality, "layer_norm_eps", self.transformer_config.layer_norm_eps
)
if modality.type == "text":
setattr(
self,
modality.key + "_embedding",
nn.Embedding(
self.transformer_config.vocab_size,
self.transformer_config.hidden_size,
padding_idx=self.transformer_config.pad_token_id,
),
)
elif modality.type == "image":
setattr(
self,
modality.key + "_embedding",
nn.Sequential(
nn.Linear(
modality.embedding_dim, self.transformer_config.hidden_size
),
torch.nn.LayerNorm(
self.transformer_config.hidden_size, eps=layer_norm_eps
),
),
)
# Set the position embeddings
position_dim = getattr(
modality,
"position_dim",
self.transformer_config.max_position_embeddings,
)
setattr(
self,
modality.key + "_pos_embedding",
nn.Embedding(position_dim, self.transformer_config.hidden_size),
)
# Layer norm
setattr(
self,
modality.key + "_layer_norm",
torch.nn.LayerNorm(
self.transformer_config.hidden_size, eps=layer_norm_eps
),
)
# Dropout
hidden_dropout_prob = getattr(
modality,
"hidden_dropout_prob",
self.transformer_config.hidden_dropout_prob,
)
setattr(self, modality.key + "_dropout", nn.Dropout(hidden_dropout_prob))
self.token_type_embeddings = nn.Embedding(
len(self.model_config.modalities), self.transformer_config.hidden_size
)