in torchmoji/model_def.py [0:0]
def forward(self, input_seqs):
""" Forward pass.
# Arguments:
input_seqs: Can be one of Numpy array, Torch.LongTensor, Torch.Variable, Torch.PackedSequence.
# Return:
Same format as input format (except for PackedSequence returned as Variable).
"""
# Check if we have Torch.LongTensor inputs or not Torch.Variable (assume Numpy array in this case), take note to return same format
return_numpy = False
return_tensor = False
if isinstance(input_seqs, (torch.LongTensor, torch.cuda.LongTensor)):
input_seqs = Variable(input_seqs)
return_tensor = True
elif not isinstance(input_seqs, Variable):
input_seqs = Variable(torch.from_numpy(input_seqs.astype('int64')).long())
return_numpy = True
# If we don't have a packed inputs, let's pack it
reorder_output = False
if not isinstance(input_seqs, PackedSequence):
ho = self.lstm_0.weight_hh_l0.data.new(2, input_seqs.size()[0], self.hidden_size).zero_()
co = self.lstm_0.weight_hh_l0.data.new(2, input_seqs.size()[0], self.hidden_size).zero_()
# Reorder batch by sequence length
input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])])
input_lengths, perm_idx = input_lengths.sort(0, descending=True)
input_seqs = input_seqs[perm_idx][:, :input_lengths.max()]
# Pack sequence and work on data tensor to reduce embeddings/dropout computations
packed_input = pack_padded_sequence(input_seqs, input_lengths.cpu().numpy(), batch_first=True)
reorder_output = True
else:
ho = self.lstm_0.weight_hh_l0.data.data.new(2, input_seqs.size()[0], self.hidden_size).zero_()
co = self.lstm_0.weight_hh_l0.data.data.new(2, input_seqs.size()[0], self.hidden_size).zero_()
input_lengths = input_seqs.batch_sizes
packed_input = input_seqs
hidden = (Variable(ho, requires_grad=False), Variable(co, requires_grad=False))
# Embed with an activation function to bound the values of the embeddings
x = self.embed(packed_input.data)
x = nn.Tanh()(x)
# pyTorch 2D dropout2d operate on axis 1 which is fine for us
x = self.embed_dropout(x)
# Update packed sequence data for RNN
packed_input = PackedSequence(x, packed_input.batch_sizes)
# skip-connection from embedding to output eases gradient-flow and allows access to lower-level features
# ordering of the way the merge is done is important for consistency with the pretrained model
lstm_0_output, _ = self.lstm_0(packed_input, hidden)
lstm_1_output, _ = self.lstm_1(lstm_0_output, hidden)
# Update packed sequence data for attention layer
packed_input = PackedSequence(torch.cat((lstm_1_output.data,
lstm_0_output.data,
packed_input.data), dim=1),
packed_input.batch_sizes)
input_seqs, _ = pad_packed_sequence(packed_input, batch_first=True)
x, att_weights = self.attention_layer(input_seqs, input_lengths)
# output class probabilities or penultimate feature vector
if not self.feature_output:
x = self.final_dropout(x)
outputs = self.output_layer(x)
else:
outputs = x
# Reorder output if needed
if reorder_output:
reorered = Variable(outputs.data.new(outputs.size()))
reorered[perm_idx] = outputs
outputs = reorered
# Adapt return format if needed
if return_tensor:
outputs = outputs.data
if return_numpy:
outputs = outputs.data.numpy()
if self.return_attention:
return outputs, att_weights
else:
return outputs