in src/modeling/res_encoder.py [0:0]
def forward(self, input_ids, attention_mask, labels=None):
# if self.max_l:
# l1 = l1.clamp(max=self.max_l)
# l2 = l2.clamp(max=self.max_l)
# if s1.size(0) > self.max_l:
# s1 = s1[:self.max_l, :]
# if s2.size(0) > self.max_l:
# s2 = s2[:self.max_l, :]
batch_l_1 = torch.sum(attention_mask, dim=1)
# p_s1 = self.Embd(s1)
embedding_1 = self.Embd(input_ids)
s1_layer3_maxout = torch_util.avg_along_time(embedding_1, batch_l_1)
# s2_layer3_maxout = torch_util.max_along_time(s2_layer3_out, l2)
# Only use the last layer
# features = torch.cat([s1_layer3_maxout, s2_layer3_maxout,
# torch.abs(s1_layer3_maxout - s2_layer3_maxout),
# s1_layer3_maxout * s2_layer3_maxout],
# dim=1)
features = torch.cat([s1_layer3_maxout],
dim=1)
logits = self.classifier(features)
loss = None
if labels is not None:
if self.num_labels == 1:
# We are doing regression
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1), labels.view(-1))
else:
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return (loss, logits)