def ssd_decode_and_crop()

in scripts/tf_cnn_benchmarks/ssd_dataloader.py [0:0]


def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape):
  """Crop image randomly and decode the cropped region.

  This function will crop an image to meet the following requirements:
  1. height to width ratio between 0.5 and 2;
  2. IoUs of some boxes exceed specified threshold;
  3. At least one box center is in the cropped region.
  We defer the jpeg decoding task until after the crop to avoid wasted work.

  Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation

  Args:
    image_buffer: Tensor tf.string containing the contents of a JPEG file.
    boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of
      object bounding boxes.
    classes: Tensor tf.int64 of shape [num_boxes, 1], containing class labels
      of objects.
    raw_shape: [height, width, 3].

  Returns:
    resized_image: decoded, cropped, and resized image Tensor tf.float32 of
      shape [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE, 3], value
      range 0--255.
    cropped_boxes: box coordinates for objects in the cropped region.
    cropped_classes: class labels for objects in the cropped region.
  """

  num_boxes = tf.shape(boxes)[0]

  def no_crop_check():
    return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32)
            < ssd_constants.P_NO_CROP_PER_PASS)

  def no_crop_proposal():
    return (
        tf.ones((), tf.bool),
        tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32),
        tf.ones((num_boxes,), tf.bool),
    )

  def crop_proposal():
    rand_vec = lambda minval, maxval: tf.random_uniform(
        shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval,
        dtype=tf.float32)

    width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
    left, top = rand_vec(0, 1-width), rand_vec(0, 1-height)

    right = left + width
    bottom = top + height

    ltrb = tf.concat([left, top, right, bottom], axis=1)

    min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
    ious = calc_iou_tensor(ltrb, boxes)

    # discard any bboxes whose center not in the cropped image
    xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
                      (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)]

    masks = tf.reduce_all(tf.stack([
        tf.greater(xc, tf.tile(left, (1, num_boxes))),
        tf.less(xc, tf.tile(right, (1, num_boxes))),
        tf.greater(yc, tf.tile(top, (1, num_boxes))),
        tf.less(yc, tf.tile(bottom, (1, num_boxes))),
    ], axis=2), axis=2)

    # Checks of whether a crop is valid.
    valid_aspect = tf.logical_and(tf.less(height/width, 2),
                                  tf.less(width/height, 2))
    valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True)
    valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)

    valid_all = tf.cast(tf.reduce_all(tf.concat(
        [valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32)

    # One indexed, as zero is needed for the case of no matches.
    index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)

    # Either one-hot, or zeros if there is no valid crop.
    selection = tf.equal(tf.reduce_max(index * valid_all), index)

    use_crop = tf.reduce_any(selection)
    output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast(
        selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0)
    output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile(
        selection[:, tf.newaxis], (1, num_boxes))), axis=0)

    return use_crop, output_ltrb, output_masks

  def proposal(*args):
    return tf.cond(
        pred=no_crop_check(),
        true_fn=no_crop_proposal,
        false_fn=crop_proposal,
    )

  _, crop_bounds, box_masks = tf.while_loop(
      cond=lambda x, *_: tf.logical_not(x),
      body=proposal,
      loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)],
  )

  filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0)

  mlperf.logger.log(key=mlperf.tags.NUM_CROPPING_ITERATIONS,
                    value=ssd_constants.NUM_CROP_PASSES)

  # Clip boxes to the cropped region.
  filtered_boxes = tf.stack([
      tf.maximum(filtered_boxes[:, 0], crop_bounds[0]),
      tf.maximum(filtered_boxes[:, 1], crop_bounds[1]),
      tf.minimum(filtered_boxes[:, 2], crop_bounds[2]),
      tf.minimum(filtered_boxes[:, 3], crop_bounds[3]),
  ], axis=1)

  left = crop_bounds[0]
  top = crop_bounds[1]
  width = crop_bounds[2] - left
  height = crop_bounds[3] - top

  cropped_boxes = tf.stack([
      (filtered_boxes[:, 0] - left) / width,
      (filtered_boxes[:, 1] - top) / height,
      (filtered_boxes[:, 2] - left) / width,
      (filtered_boxes[:, 3] - top) / height,
  ], axis=1)

  # crop_window containing integer coordinates of cropped region. A normalized
  # coordinate value of y should be mapped to the image coordinate at
  # y * (height - 1).
  raw_shape = tf.cast(raw_shape, tf.float32)
  crop_window = tf.stack([left * (raw_shape[0] - 1),
                          top * (raw_shape[1] - 1),
                          width * raw_shape[0],
                          height * raw_shape[1]])
  crop_window = tf.cast(crop_window, tf.int32)

  # Fused op only decodes the cropped portion of an image
  cropped_image = tf.image.decode_and_crop_jpeg(
      image_buffer, crop_window, channels=3)

  # Resize converts image dtype from uint8 to float32, without rescaling values.
  resized_image = tf.image.resize_images(
      cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE])
  mlperf.logger.log(key=mlperf.tags.INPUT_SIZE,
                    value=ssd_constants.IMAGE_SIZE)

  cropped_classes = tf.boolean_mask(classes, box_masks, axis=0)

  return resized_image, cropped_boxes, cropped_classes