in code/src/model/attention.py [0:0]
def __init__(self, params, encoder):
"""
Decoder initialization.
"""
super(Decoder, self).__init__()
# model parameters
self.attributes = params.attributes
self.attr_values = params.attr_values
self.n_words = params.n_words
self.share_encdec_emb = params.share_encdec_emb
self.share_decpro_emb = params.share_decpro_emb
self.emb_dim = params.emb_dim
self.hidden_dim = params.hidden_dim
self.lstm_proj = params.lstm_proj
self.dropout = params.dropout
self.n_dec_layers = params.n_dec_layers
self.input_feeding = params.input_feeding
self.freeze_dec_emb = params.freeze_dec_emb
self.bos_attr = params.bos_attr
self.bias_attr = params.bias_attr
assert not self.share_decpro_emb or self.lstm_proj or self.emb_dim == self.hidden_dim
assert self.n_dec_layers > 1 or self.n_dec_layers == 1 and self.input_feeding
assert self.bos_attr in ['', 'avg', 'cross']
assert self.bias_attr in ['', 'avg', 'cross']
# indexes
self.bos_index = params.bos_index
self.eos_index = params.eos_index
self.pad_index = params.pad_index
# attribute embeddings / bias
if self.bos_attr != '' or self.bias_attr != '':
self.register_buffer('attr_offset', params.attr_offset.clone())
self.register_buffer('attr_shifts', params.attr_shifts.clone())
if self.bos_attr != '':
n_bos_attr = sum(params.n_labels) if self.bos_attr == 'avg' else reduce(mul, params.n_labels, 1)
self.bos_attr_embeddings = nn.Embedding(n_bos_attr, self.emb_dim)
if self.bias_attr != '':
n_bias_attr = sum(params.n_labels) if self.bias_attr == 'avg' else reduce(mul, params.n_labels, 1)
self.bias_attr_embeddings = nn.Embedding(n_bias_attr, self.n_words)
# embedding layers
if self.share_encdec_emb:
logger.info("Sharing encoder and decoder input embeddings")
self.embeddings = encoder.embeddings
else:
self.embeddings = nn.Embedding(self.n_words, self.emb_dim, padding_idx=self.pad_index)
nn.init.normal_(self.embeddings.weight, 0, 0.1)
nn.init.constant_(self.embeddings.weight[self.pad_index], 0)
# LSTM layers
isize1 = self.emb_dim + (self.emb_dim if self.input_feeding else 0)
isize2 = self.hidden_dim + (0 if self.input_feeding else self.emb_dim)
self.lstm1 = nn.LSTM(isize1, self.hidden_dim, num_layers=1, dropout=self.dropout, bias=True)
self.lstm2 = nn.LSTM(isize2, self.hidden_dim, num_layers=self.n_dec_layers - 1, dropout=self.dropout, bias=True) if self.n_dec_layers > 1 else None
# attention layers
self.att_proj = nn.Linear(self.hidden_dim, self.emb_dim)
# projection layers between LSTM and output embeddings
if self.lstm_proj:
self.lstm_proj_layer = nn.Linear(self.hidden_dim, self.emb_dim)
proj_output_dim = self.emb_dim
else:
self.lstm_proj_layer = None
proj_output_dim = self.hidden_dim
# projection layers
proj = nn.Linear(proj_output_dim, self.n_words)
if self.share_decpro_emb:
logger.info("Sharing input embeddings and projection matrix in the decoder")
proj.weight = self.embeddings.weight
self.proj = proj