in src/modeling/res_encoder.py [0:0]
def forward(self, input_ids, attention_mask, labels=None):
# if self.max_l:
# l1 = l1.clamp(max=self.max_l)
# l2 = l2.clamp(max=self.max_l)
# if s1.size(0) > self.max_l:
# s1 = s1[:self.max_l, :]
# if s2.size(0) > self.max_l:
# s2 = s2[:self.max_l, :]
batch_l_1 = torch.sum(attention_mask, dim=1)
# p_s1 = self.Embd(s1)
embedding_1 = self.Embd(input_ids)
s1_layer1_out = torch_util.auto_rnn(self.lstm, embedding_1, batch_l_1)
# s2_layer1_out = torch_util.auto_rnn_bilstm(self.lstm, p_s2, l2)
# Length truncate
# len1 = s1_layer1_out.size(0)
# len2 = s2_layer1_out.size(0)
# p_s1 = p_s1[:len1, :, :]
# p_s2 = p_s2[:len2, :, :]
# Using high way
s1_layer2_in = torch.cat([embedding_1, s1_layer1_out], dim=2)
# s2_layer2_in = torch.cat([p_s2, s2_layer1_out], dim=2)
s1_layer2_out = torch_util.auto_rnn(self.lstm_1, s1_layer2_in, batch_l_1)
# s2_layer2_out = torch_util.auto_rnn_bilstm(self.lstm_1, s2_layer2_in, l2)
s1_layer3_in = torch.cat([embedding_1, s1_layer1_out + s1_layer2_out], dim=2)
# s2_layer3_in = torch.cat([p_s2, s2_layer1_out + s2_layer2_out], dim=2)
s1_layer3_out = torch_util.auto_rnn(self.lstm_2, s1_layer3_in, batch_l_1)
# s2_layer3_out = torch_util.auto_rnn_bilstm(self.lstm_2, s2_layer3_in, l2)
s1_layer3_maxout = torch_util.max_along_time(s1_layer3_out, batch_l_1)
# s2_layer3_maxout = torch_util.max_along_time(s2_layer3_out, l2)
# Only use the last layer
# features = torch.cat([s1_layer3_maxout, s2_layer3_maxout,
# torch.abs(s1_layer3_maxout - s2_layer3_maxout),
# s1_layer3_maxout * s2_layer3_maxout],
# dim=1)
features = torch.cat([s1_layer3_maxout],
dim=1)
logits = self.classifier(features)
loss = None
if labels is not None:
if self.num_labels == 1:
# We are doing regression
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1), labels.view(-1))
else:
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return (loss, logits)