def forward()

in src/modeling/res_encoder.py [0:0]


    def forward(self, input_ids, attention_mask, labels=None):
        # if self.max_l:
        #     l1 = l1.clamp(max=self.max_l)
        #     l2 = l2.clamp(max=self.max_l)
        #     if s1.size(0) > self.max_l:
        #         s1 = s1[:self.max_l, :]
        #     if s2.size(0) > self.max_l:
        #         s2 = s2[:self.max_l, :]
        batch_l_1 = torch.sum(attention_mask, dim=1)

        # p_s1 = self.Embd(s1)
        embedding_1 = self.Embd(input_ids)

        s1_layer3_maxout = torch_util.avg_along_time(embedding_1, batch_l_1)
        # s2_layer3_maxout = torch_util.max_along_time(s2_layer3_out, l2)

        # Only use the last layer
        # features = torch.cat([s1_layer3_maxout, s2_layer3_maxout,
        #                       torch.abs(s1_layer3_maxout - s2_layer3_maxout),
        #                       s1_layer3_maxout * s2_layer3_maxout],
        #                      dim=1)

        features = torch.cat([s1_layer3_maxout],
                             dim=1)

        logits = self.classifier(features)

        loss = None
        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        return (loss, logits)