def dataset_parser()

in tensorflow/sagemakercv/data/coco/dataloader_utils.py [0:0]
107 lines of code
10 McCabe index (conditional complexity)

def dataset_parser(value, mode, params, use_instance_mask, seed=None, regenerate_source_id=False):
    """Parse data to a fixed dimension input image and learning targets.

    Args:
    value: A dictionary contains an image and groundtruth annotations.

    Returns:
    features: a dictionary that contains the image and auxiliary
      information. The following describes {key: value} pairs in the
      dictionary.
      image: Image tensor that is preproessed to have normalized value and
        fixed dimension [image_size, image_size, 3]
      image_info: image information that includes the original height and
        width, the scale of the proccessed image to the original image, and
        the scaled height and width.
      source_ids: Source image id. Default value -1 if the source id is
        empty in the groundtruth annotation.
    labels: a dictionary that contains auxiliary information plus (optional)
      labels. The following describes {key: value} pairs in the dictionary.
      `labels` is only for training.
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of objectiveness score at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      gt_boxes: Groundtruth bounding box annotations. The box is represented
         in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
         fixed dimension [MAX_NUM_INSTANCES, 4].
      gt_classes: Groundtruth classes annotations. The tennsor is padded
        with -1 to the fixed dimension [MAX_NUM_INSTANCES].
      cropped_gt_masks: groundtrugh masks cropped by the bounding box and
        resized to a fixed size determined by params['gt_mask_size']
      regenerate_source_id: `bool`, if True TFExampleParser will use hashed
        value of `image/encoded` for `image/source_id`.
    """
    if mode not in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.EVAL]:
        raise ValueError("Unknown execution mode received: %s" % mode)

    def create_example_decoder():
        return TfExampleDecoder(
            use_instance_mask=use_instance_mask,
            regenerate_source_id=regenerate_source_id
    )

    example_decoder = create_example_decoder()

    with tf.xla.experimental.jit_scope(compile_ops=True):

        with tf.name_scope('parser'):

            data = example_decoder.decode(value)

            data['groundtruth_is_crowd'] = process_groundtruth_is_crowd(data)

            image = tf.image.convert_image_dtype(data['image'], dtype=tf.float32)

            source_id = process_source_id(data['source_id'])

            if mode in [tf.estimator.ModeKeys.PREDICT,
                        tf.estimator.ModeKeys.EVAL]:

                features = {
                    'source_ids': source_id,
                }

                if params['visualize_images_summary']:
                    features['orig_images'] = tf.image.resize(image, params['image_size'])

                features["images"], features["image_info"], _, _ = preprocess_image(
                    image,
                    boxes=None,
                    instance_masks=None,
                    image_size=params['image_size'],
                    max_level=params['max_level'],
                    augment_input_data=False,
                    seed=seed
                )

                if params['include_groundtruth_in_features']:
                    labels = prepare_labels_for_eval(
                        data,
                        target_num_instances=MAX_NUM_INSTANCES,
                        target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
                        use_instance_mask=use_instance_mask
                    )
                    return {'features': features, 'labels': labels}

                else:
                    return {'features': features}

            elif mode == tf.estimator.ModeKeys.TRAIN:

                labels = {}
                features = {
                    'source_ids': source_id
                }

                boxes, classes, indices, instance_masks = process_boxes_classes_indices_for_training(
                    data,
                    skip_crowd_during_training=params['skip_crowd_during_training'],
                    use_category=params['use_category'],
                    use_instance_mask=use_instance_mask,
                )
                
                orig_image_size = tf.shape(image)[:2]

                image, image_info, boxes, instance_masks = preprocess_image(
                    image,
                    boxes=boxes,
                    instance_masks=instance_masks,
                    image_size=params['image_size'],
                    max_level=params['max_level'],
                    augment_input_data=params['augment_input_data'],
                    seed=seed
                )

                features.update({
                    'images': image,
                    'image_info': image_info,
                })

                padded_image_size = image.get_shape().as_list()[:2]

                # Pads cropped_gt_masks.
                if use_instance_mask:
                    instance_masks = tf.expand_dims(instance_masks, -1)
                    orig_boxes = boxes * image_info[2]
                    labels['cropped_gt_masks'] = process_gt_masks_for_training(
                            instance_masks,
                            orig_boxes,
                            gt_mask_size=params['gt_mask_size'],
                            padded_image_size=orig_image_size,
                            max_num_instances=MAX_NUM_INSTANCES
                        )

                with tf.xla.experimental.jit_scope(compile_ops=False):
                    # Assign anchors.
                    (score_targets, box_targets), input_anchor = process_targets_for_training(
                        padded_image_size=padded_image_size,
                        boxes=boxes,
                        classes=classes,
                        params=params
                    )

                additional_labels = process_labels_for_training(
                    image_info, boxes, classes, score_targets, box_targets,
                    max_num_instances=MAX_NUM_INSTANCES,
                    min_level=params["min_level"],
                    max_level=params["max_level"]
                )

                labels.update(additional_labels)
                # labels["input_anchor"] = input_anchor

                # Features
                # {
                #   'source_ids': <tf.Tensor 'parser/StringToNumber:0' shape=() dtype=float32>,
                #   'images': <tf.Tensor 'parser/pad_to_bounding_box/Squeeze:0' shape=(1024, 1024, 3) dtype=float32>,
                #   'image_info': <tf.Tensor 'parser/stack_1:0' shape=(5,) dtype=float32>
                # }

                FAKE_FEATURES = False

                if FAKE_FEATURES:
                    labels["source_ids"] = tf.ones(shape=(), dtype=tf.float32)
                    labels["images"] = tf.ones(shape=(1024, 1024, 3), dtype=tf.float32)
                    labels["image_info"] = tf.ones(shape=(5,), dtype=tf.float32)

                # Labels
                # {
                #   'cropped_gt_masks': <tf.Tensor 'parser/Reshape_4:0' shape=(100, 116, 116) dtype=float32>,
                #   'score_targets_2': <tf.Tensor 'parser/Reshape_9:0' shape=(256, 256, 3) dtype=int32>,
                #   'box_targets_2': <tf.Tensor 'parser/Reshape_14:0' shape=(256, 256, 12) dtype=float32>,
                #   'score_targets_3': <tf.Tensor 'parser/Reshape_10:0' shape=(128, 128, 3) dtype=int32>,
                #   'box_targets_3': <tf.Tensor 'parser/Reshape_15:0' shape=(128, 128, 12) dtype=float32>,
                #   'score_targets_4': <tf.Tensor 'parser/Reshape_11:0' shape=(64, 64, 3) dtype=int32>,
                #   'box_targets_4': <tf.Tensor 'parser/Reshape_16:0' shape=(64, 64, 12) dtype=float32>,
                #   'score_targets_5': <tf.Tensor 'parser/Reshape_12:0' shape=(32, 32, 3) dtype=int32>,
                #   'box_targets_5': <tf.Tensor 'parser/Reshape_17:0' shape=(32, 32, 12) dtype=float32>,
                #   'score_targets_6': <tf.Tensor 'parser/Reshape_13:0' shape=(16, 16, 3) dtype=int32>,
                #   'box_targets_6': <tf.Tensor 'parser/Reshape_18:0' shape=(16, 16, 12) dtype=float32>,
                #   'gt_boxes': <tf.Tensor 'parser/Reshape_20:0' shape=(100, 4) dtype=float32>,
                #   'gt_classes': <tf.Tensor 'parser/Reshape_22:0' shape=(100, 1) dtype=float32>
                # }

                FAKE_LABELS = False

                if FAKE_LABELS:
                    labels["cropped_gt_masks"] = tf.ones(shape=(100, 116, 116), dtype=tf.float32)
                    labels["gt_boxes"] = tf.ones(shape=(100, 4), dtype=tf.float32)
                    labels["gt_classes"] = tf.ones(shape=(100, 1), dtype=tf.float32)

                    idx = 1
                    for dim in [256, 128, 64, 32, 16]:
                        idx += 1  # Starts at 2

                        labels["score_targets_%d" % idx] = tf.ones(shape=(dim, dim, 3), dtype=tf.float32)
                        labels["box_targets_%d" % idx] = tf.ones(shape=(dim, dim, 12), dtype=tf.float32)

                return features, labels