def build_model_graph()

in models/official/mask_rcnn/mask_rcnn_model.py [0:0]


def build_model_graph(features, labels, is_training, params):
  """Builds the forward model graph."""
  use_batched_nms = (not params['use_tpu'] and params['use_batched_nms'])
  is_gpu_inference = (not is_training and use_batched_nms)
  model_outputs = {}

  if is_training and params['transpose_input']:
    if (params['backbone'].startswith('resnet') and
        params['conv0_space_to_depth_block_size'] > 0):
      features['images'] = tf.transpose(features['images'], [2, 0, 1, 3])
    else:
      features['images'] = tf.transpose(features['images'], [3, 0, 1, 2])

  batch_size, image_height, image_width, _ = (
      features['images'].get_shape().as_list())

  conv0_space_to_depth_block_size = 0
  if (is_training and
      (params['backbone'].startswith('resnet') and
       params['conv0_space_to_depth_block_size'] > 0)):
    conv0_space_to_depth_block_size = params['conv0_space_to_depth_block_size']
    image_height *= conv0_space_to_depth_block_size
    image_width *= conv0_space_to_depth_block_size

  if 'source_ids' not in features:
    features['source_ids'] = -1 * tf.ones([batch_size], dtype=tf.float32)

  all_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                params['num_scales'], params['aspect_ratios'],
                                params['anchor_scale'],
                                (image_height, image_width))

  if 'resnet' in params['backbone']:
    with tf.variable_scope(params['backbone']):
      resnet_fn = resnet.resnet_v1(
          params['backbone'],
          conv0_kernel_size=params['conv0_kernel_size'],
          conv0_space_to_depth_block_size=conv0_space_to_depth_block_size,
          num_batch_norm_group=params['num_batch_norm_group'])
      backbone_feats = resnet_fn(
          features['images'],
          (params['is_training_bn'] and is_training))
  elif 'mnasnet' in params['backbone']:
    with tf.variable_scope(params['backbone']):
      _, endpoints = mnasnet_models.build_mnasnet_base(
          features['images'],
          params['backbone'],
          training=(params['is_training_bn'] and is_training),
          override_params={'use_keras': False})

      backbone_feats = {
          2: endpoints['reduction_2'],
          3: endpoints['reduction_3'],
          4: endpoints['reduction_4'],
          5: endpoints['reduction_5'],
      }
  else:
    raise ValueError('Not a valid backbone option: %s' % params['backbone'])

  fpn_feats = fpn.fpn(
      backbone_feats, params['min_level'], params['max_level'])
  model_outputs.update({
      'fpn_features': fpn_feats,
  })

  rpn_score_outputs, rpn_box_outputs = heads.rpn_head(
      fpn_feats,
      params['min_level'], params['max_level'],
      len(params['aspect_ratios'] * params['num_scales']))

  if is_training:
    rpn_pre_nms_topn = params['rpn_pre_nms_topn']
    rpn_post_nms_topn = params['rpn_post_nms_topn']
  else:
    rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
    rpn_post_nms_topn = params['test_rpn_post_nms_topn']

  rpn_box_scores, rpn_box_rois = roi_ops.multilevel_propose_rois(
      rpn_score_outputs,
      rpn_box_outputs,
      all_anchors,
      features['image_info'],
      rpn_pre_nms_topn,
      rpn_post_nms_topn,
      params['rpn_nms_threshold'],
      params['rpn_min_size'],
      bbox_reg_weights=None,
      use_batched_nms=use_batched_nms)
  rpn_box_rois = tf.to_float(rpn_box_rois)
  if is_training:
    rpn_box_rois = tf.stop_gradient(rpn_box_rois)
    rpn_box_scores = tf.stop_gradient(rpn_box_scores)

  if is_training:
    # Sampling
    box_targets, class_targets, rpn_box_rois, proposal_to_label_map = (
        training_ops.proposal_label_op(
            rpn_box_rois,
            labels['gt_boxes'],
            labels['gt_classes'],
            batch_size_per_im=params['batch_size_per_im'],
            fg_fraction=params['fg_fraction'],
            fg_thresh=params['fg_thresh'],
            bg_thresh_hi=params['bg_thresh_hi'],
            bg_thresh_lo=params['bg_thresh_lo']))

  # Performs multi-level RoIAlign.
  box_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
      fpn_feats, rpn_box_rois, output_size=7, is_gpu_inference=is_gpu_inference)

  class_outputs, box_outputs, _ = heads.box_head(
      box_roi_features, num_classes=params['num_classes'],
      mlp_head_dim=params['fast_rcnn_mlp_head_dim'])

  if not is_training:
    if is_gpu_inference:
      generate_detections_fn = postprocess_ops.generate_detections_gpu
    else:
      generate_detections_fn = postprocess_ops.generate_detections_tpu
    detections = generate_detections_fn(
        class_outputs,
        box_outputs,
        rpn_box_rois,
        features['image_info'],
        params['test_rpn_post_nms_topn'],
        params['test_detections_per_image'],
        params['test_nms'],
        params['bbox_reg_weights'])

    model_outputs.update({
        'num_detections': detections[0],
        'detection_boxes': detections[1],
        'detection_classes': detections[2],
        'detection_scores': detections[3],
    })
  else:
    encoded_box_targets = training_ops.encode_box_targets(
        rpn_box_rois, box_targets, class_targets, params['bbox_reg_weights'])
    model_outputs.update({
        'rpn_score_outputs': rpn_score_outputs,
        'rpn_box_outputs': rpn_box_outputs,
        'class_outputs': class_outputs,
        'box_outputs': box_outputs,
        'class_targets': class_targets,
        'box_targets': encoded_box_targets,
        'box_rois': rpn_box_rois,
    })

  # Faster-RCNN mode.
  if not params['include_mask']:
    # Print #parameters and #FLOPs in model.
    compute_model_statistics(batch_size, is_training=is_training)

    return model_outputs

  # Mask sampling
  if not is_training:
    selected_box_rois = model_outputs['detection_boxes']
    class_indices = model_outputs['detection_classes']
    # If using GPU for inference, delay the cast until when Gather ops show up
    # since GPU inference supports float point better.
    # TODO(laigd): revisit this when newer versions of GPU libraries is
    # released.
    if not is_gpu_inference:
      class_indices = tf.to_int32(class_indices)
  else:
    (selected_class_targets, selected_box_targets, selected_box_rois,
     proposal_to_label_map) = (
         training_ops.select_fg_for_masks(
             class_targets, box_targets, rpn_box_rois,
             proposal_to_label_map,
             max_num_fg=int(
                 params['batch_size_per_im'] * params['fg_fraction'])))
    class_indices = tf.to_int32(selected_class_targets)

  mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
      fpn_feats,
      selected_box_rois,
      output_size=14,
      is_gpu_inference=is_gpu_inference)
  mask_outputs = heads.mask_head(
      mask_roi_features,
      class_indices,
      num_classes=params['num_classes'],
      mrcnn_resolution=params['mrcnn_resolution'],
      is_gpu_inference=is_gpu_inference)

  # Print #parameters and #FLOPs in model.
  compute_model_statistics(batch_size, is_training=is_training)

  if is_training:
    mask_targets = training_ops.get_mask_targets(
        selected_box_rois, proposal_to_label_map, selected_box_targets,
        labels['cropped_gt_masks'], params['mrcnn_resolution'])
    model_outputs.update({
        'mask_outputs': mask_outputs,
        'mask_targets': mask_targets,
        'selected_class_targets': selected_class_targets,
    })
  else:
    model_outputs.update({
        'detection_masks': tf.nn.sigmoid(mask_outputs),
    })

  return model_outputs