in NMT/src/model/attention.py [0:0]
def __init__(self, params, encoder):
"""
Decoder initialization.
"""
super(Decoder, self).__init__()
# model parameters
self.n_langs = params.n_langs
self.n_words = params.n_words
self.share_lang_emb = params.share_lang_emb
self.share_encdec_emb = params.share_encdec_emb
self.share_decpro_emb = params.share_decpro_emb
self.share_output_emb = params.share_output_emb
self.share_lstm_proj = params.share_lstm_proj
self.share_att_proj = params.share_att_proj
self.share_dec = params.share_dec
self.emb_dim = params.emb_dim
self.hidden_dim = params.hidden_dim
self.lstm_proj = params.lstm_proj
self.dropout = params.dropout
self.n_dec_layers = params.n_dec_layers
self.input_feeding = params.input_feeding
self.freeze_dec_emb = params.freeze_dec_emb
assert not self.share_lang_emb or len(set(params.n_words)) == 1
assert not self.share_decpro_emb or self.lstm_proj or self.emb_dim == self.hidden_dim
assert 0 <= self.share_dec <= self.n_dec_layers
assert self.n_dec_layers > 1 or self.n_dec_layers == 1 and self.input_feeding
# indexes
self.eos_index = params.eos_index
self.pad_index = params.pad_index
self.bos_index = params.bos_index
# words allowed for generation
self.vocab_mask_neg = params.vocab_mask_neg if len(params.vocab) > 0 else None
# embedding layers
if self.share_encdec_emb:
logger.info("Sharing encoder and decoder input embeddings")
embeddings = encoder.embeddings
else:
if self.share_lang_emb:
logger.info("Sharing decoder input embeddings")
layer_0 = nn.Embedding(self.n_words[0], self.emb_dim, padding_idx=self.pad_index)
nn.init.normal_(layer_0.weight, 0, 0.1)
nn.init.constant_(layer_0.weight[self.pad_index], 0)
embeddings = [layer_0 for _ in range(self.n_langs)]
else:
embeddings = []
for n_words in self.n_words:
layer_i = nn.Embedding(n_words, self.emb_dim, padding_idx=self.pad_index)
nn.init.normal_(layer_i.weight, 0, 0.1)
nn.init.constant_(layer_i.weight[self.pad_index], 0)
embeddings.append(layer_i)
embeddings = nn.ModuleList(embeddings)
self.embeddings = embeddings
# LSTM layers / shared layers
self.lstm1_input_size = self.emb_dim + (self.emb_dim if self.input_feeding else 0)
self.lstm2_input_size = self.hidden_dim + (0 if self.input_feeding else self.emb_dim)
lstm1 = [
nn.LSTM(self.lstm1_input_size, self.hidden_dim, num_layers=1, dropout=self.dropout, bias=True)
for _ in range(self.n_langs)
]
if self.n_dec_layers > 1:
lstm2 = [
nn.LSTM(self.lstm2_input_size, self.hidden_dim, num_layers=self.n_dec_layers - 1, dropout=self.dropout, bias=True)
for _ in range(self.n_langs)
]
else:
lstm2 = None
for k in range(self.n_dec_layers):
if k + 1 <= self.share_dec:
logger.info("Sharing decoder LSTM parameters for layer %i" % k)
for i in range(1, self.n_langs):
for name in LSTM_PARAMS:
if k == 0:
setattr(lstm1[i], name % k, getattr(lstm1[0], name % k))
else:
setattr(lstm2[i], name % (k - 1), getattr(lstm2[0], name % (k - 1)))
self.lstm1 = nn.ModuleList(lstm1)
self.lstm2 = nn.ModuleList(lstm2)
# attention layers
if self.share_att_proj:
logger.info("Sharing decoder attention projection layers")
att_proj_0 = nn.Linear(self.hidden_dim, self.emb_dim)
att_proj = [att_proj_0 for _ in range(self.n_langs)]
else:
att_proj = [nn.Linear(self.hidden_dim, self.emb_dim) for _ in range(self.n_langs)]
self.att_proj = nn.ModuleList(att_proj)
# projection layers between LSTM and output embeddings
if self.lstm_proj:
lstm_proj_layers = [nn.Linear(self.hidden_dim, self.emb_dim) for _ in range(self.n_langs)]
if self.share_lstm_proj:
logger.info("Sharing decoder post-LSTM projection layers")
for i in range(1, self.n_langs):
lstm_proj_layers[i].weight = lstm_proj_layers[0].weight
lstm_proj_layers[i].bias = lstm_proj_layers[0].bias
self.lstm_proj_layers = nn.ModuleList(lstm_proj_layers)
proj_output_dim = self.emb_dim
else:
self.lstm_proj_layers = [None for _ in range(self.n_langs)]
proj_output_dim = self.hidden_dim
# projection layers
proj = [nn.Linear(proj_output_dim, n_words) for n_words in self.n_words]
if self.share_decpro_emb:
logger.info("Sharing input embeddings and projection matrix in the decoder")
for i in range(self.n_langs):
proj[i].weight = self.embeddings[i].weight
if self.share_lang_emb:
assert self.share_output_emb
logger.info("Sharing decoder projection matrices")
for i in range(1, self.n_langs):
proj[i].bias = proj[0].bias
elif self.share_output_emb:
assert self.share_lang_emb
logger.info("Sharing decoder projection matrices")
for i in range(1, self.n_langs):
proj[i].weight = proj[0].weight
proj[i].bias = proj[0].bias
self.proj = nn.ModuleList(proj)
self.log_sm = torch.nn.LogSoftmax()