in detic/modeling/roi_heads/detic_fast_rcnn.py [0:0]
def image_label_losses(self, predictions, proposals, image_labels, \
classifier_info=(None,None,None), ann_type='image'):
'''
Inputs:
scores: N x (C + 1)
image_labels B x 1
'''
num_inst_per_image = [len(p) for p in proposals]
scores = predictions[0]
scores = scores.split(num_inst_per_image, dim=0) # B x n x (C + 1)
if self.with_softmax_prop:
prop_scores = predictions[2].split(num_inst_per_image, dim=0)
else:
prop_scores = [None for _ in num_inst_per_image]
B = len(scores)
img_box_count = 0
select_size_count = 0
select_x_count = 0
select_y_count = 0
max_score_count = 0
storage = get_event_storage()
loss = scores[0].new_zeros([1])[0]
caption_loss = scores[0].new_zeros([1])[0]
for idx, (score, labels, prop_score, p) in enumerate(zip(
scores, image_labels, prop_scores, proposals)):
if score.shape[0] == 0:
loss += score.new_zeros([1])[0]
continue
if 'caption' in ann_type:
score, caption_loss_img = self._caption_loss(
score, classifier_info, idx, B)
caption_loss += self.caption_weight * caption_loss_img
if ann_type == 'caption':
continue
if self.debug:
p.selected = score.new_zeros(
(len(p),), dtype=torch.long) - 1
for i_l, label in enumerate(labels):
if self.dynamic_classifier:
if idx == 0 and i_l == 0 and comm.is_main_process():
storage.put_scalar('stats_label', label)
label = classifier_info[1][1][label]
assert label < score.shape[1]
if self.image_label_loss in ['wsod', 'wsddn']:
loss_i, ind = self._wsddn_loss(score, prop_score, label)
elif self.image_label_loss == 'max_score':
loss_i, ind = self._max_score_loss(score, label)
elif self.image_label_loss == 'max_size':
loss_i, ind = self._max_size_loss(score, label, p)
elif self.image_label_loss == 'first':
loss_i, ind = self._first_loss(score, label)
elif self.image_label_loss == 'image':
loss_i, ind = self._image_loss(score, label)
elif self.image_label_loss == 'min_loss':
loss_i, ind = self._min_loss_loss(score, label)
else:
assert 0
loss += loss_i / len(labels)
if type(ind) == type([]):
img_box_count = sum(ind) / len(ind)
if self.debug:
for ind_i in ind:
p.selected[ind_i] = label
else:
img_box_count = ind
select_size_count = p[ind].proposal_boxes.area() / \
(p.image_size[0] * p.image_size[1])
max_score_count = score[ind, label].sigmoid()
select_x_count = (p.proposal_boxes.tensor[ind, 0] + \
p.proposal_boxes.tensor[ind, 2]) / 2 / p.image_size[1]
select_y_count = (p.proposal_boxes.tensor[ind, 1] + \
p.proposal_boxes.tensor[ind, 3]) / 2 / p.image_size[0]
if self.debug:
p.selected[ind] = label
loss = loss / B
storage.put_scalar('stats_l_image', loss.item())
if 'caption' in ann_type:
caption_loss = caption_loss / B
loss = loss + caption_loss
storage.put_scalar('stats_l_caption', caption_loss.item())
if comm.is_main_process():
storage.put_scalar('pool_stats', img_box_count)
storage.put_scalar('stats_select_size', select_size_count)
storage.put_scalar('stats_select_x', select_x_count)
storage.put_scalar('stats_select_y', select_y_count)
storage.put_scalar('stats_max_label_score', max_score_count)
return {
'image_loss': loss * self.image_loss_weight,
'loss_cls': score.new_zeros([1])[0],
'loss_box_reg': score.new_zeros([1])[0]}