in src/model.py [0:0]
def __init__(self, dimension_params, metadata_constructor_params, layer_params):
super().__init__()
self.emb_dim = dimension_params["emb_dim"]
self.context_dim = dimension_params["context_dim"]
self.hidden_dim = dimension_params["hidden_dim"]
self.vocab_size = dimension_params["vocab_size"]
self.n_layers = layer_params["n_layers"]
self.use_softmax_adaptation = layer_params["use_softmax_adaptation"]
self.use_layernorm = layer_params["use_layernorm"]
self.use_weight_tying = layer_params["use_weight_tying"]
self.metadata_constructor = MetadataConstructor(metadata_constructor_params,
dimension_params)
self.embeddings = nn.Embedding(self.vocab_size, self.emb_dim)
self.gate_size = 4 * self.hidden_dim # input, forget, gate, output
self._all_weights = nn.ParameterList()
self._params_per_layer = 5
for layer in range(self.n_layers):
self.layer_input_size = self.emb_dim if layer == 0 else self.hidden_dim
# weight matrix for meta data
w_ih = nn.Parameter(torch.Tensor(self.gate_size, self.layer_input_size))
w_hh = nn.Parameter(torch.Tensor(self.gate_size, self.hidden_dim))
w_mh = nn.Parameter(torch.Tensor(self.gate_size, self.context_dim))
b_ih = nn.Parameter(torch.Tensor(self.gate_size))
b_hh = nn.Parameter(torch.Tensor(self.gate_size))
for param in (w_mh, w_ih, w_hh, b_ih, b_hh):
self._all_weights.append(param)
if self.use_softmax_adaptation:
self.md_vocab_projection = nn.Linear(self.context_dim, self.vocab_size)
if self.use_layernorm:
self.layernorm = nn.LayerNorm(self.hidden_dim)
if self.use_weight_tying:
self.vocab_projection = nn.Linear(self.emb_dim, self.vocab_size)
self.embedding_projection = nn.Linear(self.hidden_dim, self.emb_dim)
self.vocab_projection.weight = self.embeddings.weight
else:
self.vocab_projection = nn.Linear(self.hidden_dim, self.vocab_size)
self._reset_parameters()