def forward()

in src/modeling/res_encoder.py [0:0]


    def forward(self, input_ids, attention_mask, labels=None):
        # if self.max_l:
        #     l1 = l1.clamp(max=self.max_l)
        #     l2 = l2.clamp(max=self.max_l)
        #     if s1.size(0) > self.max_l:
        #         s1 = s1[:self.max_l, :]
        #     if s2.size(0) > self.max_l:
        #         s2 = s2[:self.max_l, :]
        batch_l_1 = torch.sum(attention_mask, dim=1)

        # p_s1 = self.Embd(s1)
        embedding_1 = self.Embd(input_ids)

        s1_layer1_out = torch_util.auto_rnn(self.lstm, embedding_1, batch_l_1)
        # s2_layer1_out = torch_util.auto_rnn_bilstm(self.lstm, p_s2, l2)

        # Length truncate
        # len1 = s1_layer1_out.size(0)
        # len2 = s2_layer1_out.size(0)
        # p_s1 = p_s1[:len1, :, :]
        # p_s2 = p_s2[:len2, :, :]

        # Using high way
        s1_layer2_in = torch.cat([embedding_1, s1_layer1_out], dim=2)
        # s2_layer2_in = torch.cat([p_s2, s2_layer1_out], dim=2)

        s1_layer2_out = torch_util.auto_rnn(self.lstm_1, s1_layer2_in, batch_l_1)
        # s2_layer2_out = torch_util.auto_rnn_bilstm(self.lstm_1, s2_layer2_in, l2)

        s1_layer3_in = torch.cat([embedding_1, s1_layer1_out + s1_layer2_out], dim=2)
        # s2_layer3_in = torch.cat([p_s2, s2_layer1_out + s2_layer2_out], dim=2)

        s1_layer3_out = torch_util.auto_rnn(self.lstm_2, s1_layer3_in, batch_l_1)
        # s2_layer3_out = torch_util.auto_rnn_bilstm(self.lstm_2, s2_layer3_in, l2)

        s1_layer3_maxout = torch_util.max_along_time(s1_layer3_out, batch_l_1)
        # s2_layer3_maxout = torch_util.max_along_time(s2_layer3_out, l2)

        # Only use the last layer
        # features = torch.cat([s1_layer3_maxout, s2_layer3_maxout,
        #                       torch.abs(s1_layer3_maxout - s2_layer3_maxout),
        #                       s1_layer3_maxout * s2_layer3_maxout],
        #                      dim=1)

        features = torch.cat([s1_layer3_maxout],
                             dim=1)

        logits = self.classifier(features)

        loss = None
        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        return (loss, logits)