def image_label_losses()

in detic/modeling/roi_heads/detic_fast_rcnn.py [0:0]


    def image_label_losses(self, predictions, proposals, image_labels, \
        classifier_info=(None,None,None), ann_type='image'):
        '''
        Inputs:
            scores: N x (C + 1)
            image_labels B x 1
        '''
        num_inst_per_image = [len(p) for p in proposals]
        scores = predictions[0]
        scores = scores.split(num_inst_per_image, dim=0) # B x n x (C + 1)
        if self.with_softmax_prop:
            prop_scores = predictions[2].split(num_inst_per_image, dim=0)
        else:
            prop_scores = [None for _ in num_inst_per_image]
        B = len(scores)
        img_box_count = 0
        select_size_count = 0
        select_x_count = 0
        select_y_count = 0
        max_score_count = 0
        storage = get_event_storage()
        loss = scores[0].new_zeros([1])[0]
        caption_loss = scores[0].new_zeros([1])[0]
        for idx, (score, labels, prop_score, p) in enumerate(zip(
            scores, image_labels, prop_scores, proposals)):
            if score.shape[0] == 0:
                loss += score.new_zeros([1])[0]
                continue
            if 'caption' in ann_type:
                score, caption_loss_img = self._caption_loss(
                    score, classifier_info, idx, B)
                caption_loss += self.caption_weight * caption_loss_img
                if ann_type == 'caption':
                    continue

            if self.debug:
                p.selected = score.new_zeros(
                    (len(p),), dtype=torch.long) - 1
            for i_l, label in enumerate(labels):
                if self.dynamic_classifier:
                    if idx == 0 and i_l == 0 and comm.is_main_process():
                        storage.put_scalar('stats_label', label)
                    label = classifier_info[1][1][label]
                    assert label < score.shape[1]
                if self.image_label_loss in ['wsod', 'wsddn']: 
                    loss_i, ind = self._wsddn_loss(score, prop_score, label)
                elif self.image_label_loss == 'max_score':
                    loss_i, ind = self._max_score_loss(score, label)
                elif self.image_label_loss == 'max_size':
                    loss_i, ind = self._max_size_loss(score, label, p)
                elif self.image_label_loss == 'first':
                    loss_i, ind = self._first_loss(score, label)
                elif self.image_label_loss == 'image':
                    loss_i, ind = self._image_loss(score, label)
                elif self.image_label_loss == 'min_loss':
                    loss_i, ind = self._min_loss_loss(score, label)
                else:
                    assert 0
                loss += loss_i / len(labels)
                if type(ind) == type([]):
                    img_box_count = sum(ind) / len(ind)
                    if self.debug:
                        for ind_i in ind:
                            p.selected[ind_i] = label
                else:
                    img_box_count = ind
                    select_size_count = p[ind].proposal_boxes.area() / \
                        (p.image_size[0] * p.image_size[1])
                    max_score_count = score[ind, label].sigmoid()
                    select_x_count = (p.proposal_boxes.tensor[ind, 0] + \
                        p.proposal_boxes.tensor[ind, 2]) / 2 / p.image_size[1]
                    select_y_count = (p.proposal_boxes.tensor[ind, 1] + \
                        p.proposal_boxes.tensor[ind, 3]) / 2 / p.image_size[0]
                    if self.debug:
                        p.selected[ind] = label

        loss = loss / B
        storage.put_scalar('stats_l_image', loss.item())
        if 'caption' in ann_type:
            caption_loss = caption_loss / B
            loss = loss + caption_loss
            storage.put_scalar('stats_l_caption', caption_loss.item())
        if comm.is_main_process():
            storage.put_scalar('pool_stats', img_box_count)
            storage.put_scalar('stats_select_size', select_size_count)
            storage.put_scalar('stats_select_x', select_x_count)
            storage.put_scalar('stats_select_y', select_y_count)
            storage.put_scalar('stats_max_label_score', max_score_count)

        return {
            'image_loss': loss * self.image_loss_weight,
            'loss_cls': score.new_zeros([1])[0],
            'loss_box_reg': score.new_zeros([1])[0]}