in detectron/roi_data/retinanet.py [0:0]
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):
"""Add RetinaNet blobs."""
# RetinaNet is applied to many feature levels, as in the FPN paper
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
anchor_scale = cfg.RETINANET.ANCHOR_SCALE
# get anchors from all levels for all scales/aspect ratios
foas = []
for lvl in range(k_min, k_max + 1):
stride = 2. ** lvl
for octave in range(scales_per_octave):
octave_scale = 2 ** (octave / float(scales_per_octave))
for idx in range(num_aspect_ratios):
anchor_sizes = (stride * octave_scale * anchor_scale, )
anchor_aspect_ratios = (aspect_ratios[idx], )
foa = data_utils.get_field_of_anchors(
stride, anchor_sizes, anchor_aspect_ratios, octave, idx)
foas.append(foa)
all_anchors = np.concatenate([f.field_of_anchors for f in foas])
blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
for im_i, entry in enumerate(roidb):
scale = im_scales[im_i]
im_height = np.round(entry['height'] * scale)
im_width = np.round(entry['width'] * scale)
gt_inds = np.where(
(entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
assert len(gt_inds) > 0, \
'Empty ground truth empty for image is not allowed. Please check.'
gt_rois = entry['boxes'][gt_inds, :] * scale
gt_classes = entry['gt_classes'][gt_inds]
im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
blobs['im_info'].append(im_info)
retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
foas, all_anchors, gt_rois, gt_classes, image_width, image_height)
for i, foa in enumerate(foas):
for k, v in retinanet_blobs[i].items():
# the way it stacks is:
# [[anchors for image1] + [anchors for images 2]]
level = int(np.log2(foa.stride))
key = '{}_fpn{}'.format(k, level)
if k == 'retnet_roi_fg_bbox_locs':
v[:, 0] = im_i
# loc_stride: 80 * 4 if cls_specific else 4
loc_stride = 4 # 4 coordinate corresponding to bbox prediction
if cfg.RETINANET.CLASS_SPECIFIC_BBOX:
loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)
anchor_ind = foa.octave * num_aspect_ratios + foa.aspect
# v[:, 1] is the class label [range 0-80] if we do
# class-specfic bbox otherwise it is 0. In case of class
# specific, based on the label, the location of current
# anchor is class_label * 4 and then we take into account
# the anchor_ind if the anchors
v[:, 1] *= 4
v[:, 1] += loc_stride * anchor_ind
blobs[key].append(v)
blobs['retnet_fg_num'] += fg_num
blobs['retnet_bg_num'] += bg_num
blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)
N = len(roidb)
for k, v in blobs.items():
if isinstance(v, list) and len(v) > 0:
# compute number of anchors
A = int(len(v) / N)
# for the cls branch labels [per fpn level],
# we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
# and length of this list is N x A where
# N = num_images, A = num_anchors for example, N = 2, A = 9
# Each element of the list has the shape 1 x 1 x H x W where H, W are
# spatial dimension of curret fpn lvl. Let a{i} denote the element
# corresponding to anchor i [9 anchors total] in the list.
# The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
# however the network will make predictions like 2 x (9 * 80) x H x W
# so we first concatenate the elements of each image to a numpy array
# and then concatenate the two images to get the 2 x 9 x H x W
if k.find('retnet_cls_labels') >= 0:
tmp = []
# concat anchors within an image
for i in range(0, len(v), A):
tmp.append(np.concatenate(v[i: i + A], axis=1))
# concat images
blobs[k] = np.concatenate(tmp, axis=0)
else:
# for the bbox branch elements [per FPN level],
# we have the targets and the fg boxes locations
# in the shape: M x 4 where M is the number of fg locations in a
# given image at the current FPN level. For the given level,
# the bbox predictions will be. The elements in the list are in
# order [[a0, ..., a9], [a0, ..., a9]]
# Concatenate them to form M x 4
blobs[k] = np.concatenate(v, axis=0)
return True