in models/official/mask_rcnn/mask_rcnn_model.py [0:0]
def build_model_graph(features, labels, is_training, params):
"""Builds the forward model graph."""
use_batched_nms = (not params['use_tpu'] and params['use_batched_nms'])
is_gpu_inference = (not is_training and use_batched_nms)
model_outputs = {}
if is_training and params['transpose_input']:
if (params['backbone'].startswith('resnet') and
params['conv0_space_to_depth_block_size'] > 0):
features['images'] = tf.transpose(features['images'], [2, 0, 1, 3])
else:
features['images'] = tf.transpose(features['images'], [3, 0, 1, 2])
batch_size, image_height, image_width, _ = (
features['images'].get_shape().as_list())
conv0_space_to_depth_block_size = 0
if (is_training and
(params['backbone'].startswith('resnet') and
params['conv0_space_to_depth_block_size'] > 0)):
conv0_space_to_depth_block_size = params['conv0_space_to_depth_block_size']
image_height *= conv0_space_to_depth_block_size
image_width *= conv0_space_to_depth_block_size
if 'source_ids' not in features:
features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)
all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
params['num_scales'], params['aspect_ratios'],
params['anchor_scale'],
(image_height, image_width))
if 'resnet' in params['backbone']:
with tf.variable_scope(params['backbone']):
resnet_fn = resnet.resnet_v1(
params['backbone'],
conv0_kernel_size=params['conv0_kernel_size'],
conv0_space_to_depth_block_size=conv0_space_to_depth_block_size,
num_batch_norm_group=params['num_batch_norm_group'])
backbone_feats = resnet_fn(
features['images'],
(params['is_training_bn'] and is_training))
elif 'mnasnet' in params['backbone']:
with tf.variable_scope(params['backbone']):
_, endpoints = mnasnet_models.build_mnasnet_base(
features['images'],
params['backbone'],
training=(params['is_training_bn'] and is_training),
override_params={'use_keras': False})
backbone_feats = {
2: endpoints['reduction_2'],
3: endpoints['reduction_3'],
4: endpoints['reduction_4'],
5: endpoints['reduction_5'],
}
else:
raise ValueError('Not a valid backbone option: %s' % params['backbone'])
fpn_feats = fpn.fpn(
backbone_feats, params['min_level'], params['max_level'])
model_outputs.update({
'fpn_features': fpn_feats,
})
rpn_score_outputs, rpn_box_outputs = heads.rpn_head(
fpn_feats,
params['min_level'], params['max_level'],
len(params['aspect_ratios'] * params['num_scales']))
if is_training:
rpn_pre_nms_topn = params['rpn_pre_nms_topn']
rpn_post_nms_topn = params['rpn_post_nms_topn']
else:
rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
rpn_post_nms_topn = params['test_rpn_post_nms_topn']
rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
rpn_score_outputs,
rpn_box_outputs,
all_anchors,
features['image_info'],
rpn_pre_nms_topn,
rpn_post_nms_topn,
params['rpn_nms_threshold'],
params['rpn_min_size'],
bbox_reg_weights=None,
use_batched_nms=use_batched_nms)
rpn_box_rois = tf.to_float(rpn_box_rois)
if is_training:
rpn_box_rois = tf.stop_gradient(rpn_box_rois)
rpn_box_scores = tf.stop_gradient(rpn_box_scores)
if is_training:
# Sampling
box_targets, class_targets, rpn_box_rois, proposal_to_label_map = (
training_ops.proposal_label_op(
rpn_box_rois,
labels['gt_boxes'],
labels['gt_classes'],
batch_size_per_im=params['batch_size_per_im'],
fg_fraction=params['fg_fraction'],
fg_thresh=params['fg_thresh'],
bg_thresh_hi=params['bg_thresh_hi'],
bg_thresh_lo=params['bg_thresh_lo']))
# Performs multi-level RoIAlign.
box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
fpn_feats, rpn_box_rois, output_size=7, is_gpu_inference=is_gpu_inference)
class_outputs, box_outputs, _ = heads.box_head(
box_roi_features, num_classes=params['num_classes'],
mlp_head_dim=params['fast_rcnn_mlp_head_dim'])
if not is_training:
if is_gpu_inference:
generate_detections_fn = postprocess_ops.generate_detections_gpu
else:
generate_detections_fn = postprocess_ops.generate_detections_tpu
detections = generate_detections_fn(
class_outputs,
box_outputs,
rpn_box_rois,
features['image_info'],
params['test_rpn_post_nms_topn'],
params['test_detections_per_image'],
params['test_nms'],
params['bbox_reg_weights'])
model_outputs.update({
'num_detections': detections[0],
'detection_boxes': detections[1],
'detection_classes': detections[2],
'detection_scores': detections[3],
})
else:
encoded_box_targets = training_ops.encode_box_targets(
rpn_box_rois, box_targets, class_targets, params['bbox_reg_weights'])
model_outputs.update({
'rpn_score_outputs': rpn_score_outputs,
'rpn_box_outputs': rpn_box_outputs,
'class_outputs': class_outputs,
'box_outputs': box_outputs,
'class_targets': class_targets,
'box_targets': encoded_box_targets,
'box_rois': rpn_box_rois,
})
# Faster-RCNN mode.
if not params['include_mask']:
# Print #parameters and #FLOPs in model.
compute_model_statistics(batch_size, is_training=is_training)
return model_outputs
# Mask sampling
if not is_training:
selected_box_rois = model_outputs['detection_boxes']
class_indices = model_outputs['detection_classes']
# If using GPU for inference, delay the cast until when Gather ops show up
# since GPU inference supports float point better.
# TODO(laigd): revisit this when newer versions of GPU libraries is
# released.
if not is_gpu_inference:
class_indices = tf.to_int32(class_indices)
else:
(selected_class_targets, selected_box_targets, selected_box_rois,
proposal_to_label_map) = (
training_ops.select_fg_for_masks(
class_targets, box_targets, rpn_box_rois,
proposal_to_label_map,
max_num_fg=int(
params['batch_size_per_im'] * params['fg_fraction'])))
class_indices = tf.to_int32(selected_class_targets)
mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
fpn_feats,
selected_box_rois,
output_size=14,
is_gpu_inference=is_gpu_inference)
mask_outputs = heads.mask_head(
mask_roi_features,
class_indices,
num_classes=params['num_classes'],
mrcnn_resolution=params['mrcnn_resolution'],
is_gpu_inference=is_gpu_inference)
# Print #parameters and #FLOPs in model.
compute_model_statistics(batch_size, is_training=is_training)
if is_training:
mask_targets = training_ops.get_mask_targets(
selected_box_rois, proposal_to_label_map, selected_box_targets,
labels['cropped_gt_masks'], params['mrcnn_resolution'])
model_outputs.update({
'mask_outputs': mask_outputs,
'mask_targets': mask_targets,
'selected_class_targets': selected_class_targets,
})
else:
model_outputs.update({
'detection_masks': tf.nn.sigmoid(mask_outputs),
})
return model_outputs