in FasterRCNNDetection/trainer.py [0:0]
def forward(self, imgs, bboxes, labels, scale):
"""
Forward Faster R-CNN and calculate losses.
Here are notations used.
* :math:`N` is the batch size.
* :math:`R` is the number of bounding boxes per image.
Currently, only :math:`N=1` is supported.
Args:
imgs (~torch.autograd.Variable): A variable with a batch of images.
bboxes (~torch.autograd.Variable): A batch of bounding boxes.
Its shape is :math:`(N, R, 4)`.
labels (~torch.autograd..Variable): A batch of labels.
Its shape is :math:`(N, R)`. The background is excluded from
the definition, which means that the range of the value
is :math:`[0, L - 1]`. :math:`L` is the number of foreground
classes.
scale (float): Amount of scaling applied to
the raw image during preprocessing.
Returns:
namedtuple of 5 losses
"""
n = bboxes.shape[0]
if n != 1:
raise ValueError('Currently only batch size 1 is supported.')
_, _, H, W = imgs.shape
img_size = (H, W)
features = self.faster_rcnn.extractor(imgs)
rpn_locs, rpn_scores, rois, roi_indices, anchor = \
self.faster_rcnn.rpn(features, img_size, scale)
# Since batch size is one, convert variables to singular form
#print(bboxes)
bbox = bboxes[0]
label = labels[0]
rpn_score = rpn_scores[0]
rpn_loc = rpn_locs[0]
roi = rois
# Sample RoIs and forward
# it's fine to break the computation graph of rois,
# consider them as constant input
sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
roi,
at.tonumpy(bbox),
at.tonumpy(label),
self.loc_normalize_mean,
self.loc_normalize_std)
#print(gt_roi_label)
#print('got region proposals')
# NOTE it's all zero because now it only support for batch=1 now
sample_roi_index = t.zeros(len(sample_roi))
roi_cls_loc, roi_score = self.faster_rcnn.head(
features,
sample_roi,
sample_roi_index)
# ------------------ RPN losses -------------------#
n_bbox = bbox.shape
if len(n_bbox) > 0:
n_bbox = n_bbox[0]
if n_bbox > 0:
gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
at.tonumpy(bbox),
anchor,
img_size)
#print(gt_rpn_label.shape)
#print(gt_rpn_label)
#print(anchor.shape)
#print(sample_roi.shape)
#print('got anchor targets')
gt_rpn_label = at.tovariable(gt_rpn_label).long()
gt_rpn_loc = at.tovariable(gt_rpn_loc)
rpn_loc_loss = _fast_rcnn_loc_loss(
rpn_loc,
gt_rpn_loc,
gt_rpn_label.data,
self.rpn_sigma)
#print(rpn_loc_loss)
else: #if no bboxes, should have no rpn loc loss
rpn_loc_loss = t.tensor(0.)
if opt.use_cuda:
rpn_loc_loss = rpn_loc_loss.cuda()
#print('got rpn loc loss')
# if no bboxes, all region labels are 0 (background)
if n_bbox == 0:
gt_rpn_label = t.tensor([0 for i in range(anchor.shape[0])])
# NOTE: default value of ignore_index is -100 ...
fg_bg_count = np.unique(gt_rpn_label.detach().cpu(), return_counts=True)[1][1:]
if opt.reduce_bg_weight:
# Reweight foreground / background for the case we couldn't sample identical numbers
rpn_class_weights = 1.0 / fg_bg_count
rpn_class_weights = t.FloatTensor(rpn_class_weights / np.sum(rpn_class_weights) * 2)
else:
rpn_class_weights = None
if opt.use_cuda:
rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1,
weight=rpn_class_weights.cuda() if rpn_class_weights is not None else None)
else:
rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1, weight=rpn_class_weights)
_gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
_rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())
#print('got rpn class loss')
# ------------------ ROI losses (fast rcnn loss) -------------------#
n_sample = roi_cls_loc.shape[0]
roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
#print(n_sample, gt_roi_label.shape, sample_roi.shape)
if opt.use_cuda:
roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()]
else:
roi_loc = roi_cls_loc[t.arange(0, n_sample).long(), at.totensor(gt_roi_label).long()]
gt_roi_label = at.tovariable(gt_roi_label).long()
gt_roi_loc = at.tovariable(gt_roi_loc)
if n_bbox > 0:
roi_loc_loss = _fast_rcnn_loc_loss(
roi_loc.contiguous(),
gt_roi_loc,
gt_roi_label.data,
self.roi_sigma)
else: #no roi loc loss if no gt bboxes
roi_loc_loss = t.tensor(0.)
if opt.use_cuda:
roi_loc_loss = roi_loc_loss.cuda()
#print('got roi loc loss')
if opt.reduce_bg_weight:
bg_weight = 1.0 / gt_roi_label.size()[0]
class_weights = t.FloatTensor(np.hstack([bg_weight, np.ones((self.n_fg_class,))]))
else:
class_weights = None
if opt.use_cuda:
roi_cls_loss = nn.CrossEntropyLoss(weight=class_weights.cuda() if
class_weights is not None else None)(roi_score, gt_roi_label.cuda())
else:
roi_cls_loss = nn.CrossEntropyLoss(weight=class_weights)(roi_score, gt_roi_label)
self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())
#print('got roi class loss')
losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
#print(losses)
sum_losses = sum(losses)
#print(sum_losses.type)
losses = losses + [sum_losses]
return LossTuple(*losses)