in scripts/tf_cnn_benchmarks/ssd_dataloader.py [0:0]
def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape):
"""Crop image randomly and decode the cropped region.
This function will crop an image to meet the following requirements:
1. height to width ratio between 0.5 and 2;
2. IoUs of some boxes exceed specified threshold;
3. At least one box center is in the cropped region.
We defer the jpeg decoding task until after the crop to avoid wasted work.
Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation
Args:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of
object bounding boxes.
classes: Tensor tf.int64 of shape [num_boxes, 1], containing class labels
of objects.
raw_shape: [height, width, 3].
Returns:
resized_image: decoded, cropped, and resized image Tensor tf.float32 of
shape [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE, 3], value
range 0--255.
cropped_boxes: box coordinates for objects in the cropped region.
cropped_classes: class labels for objects in the cropped region.
"""
num_boxes = tf.shape(boxes)[0]
def no_crop_check():
return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32)
< ssd_constants.P_NO_CROP_PER_PASS)
def no_crop_proposal():
return (
tf.ones((), tf.bool),
tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32),
tf.ones((num_boxes,), tf.bool),
)
def crop_proposal():
rand_vec = lambda minval, maxval: tf.random_uniform(
shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval,
dtype=tf.float32)
width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
left, top = rand_vec(0, 1-width), rand_vec(0, 1-height)
right = left + width
bottom = top + height
ltrb = tf.concat([left, top, right, bottom], axis=1)
min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
ious = calc_iou_tensor(ltrb, boxes)
# discard any bboxes whose center not in the cropped image
xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
(ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)]
masks = tf.reduce_all(tf.stack([
tf.greater(xc, tf.tile(left, (1, num_boxes))),
tf.less(xc, tf.tile(right, (1, num_boxes))),
tf.greater(yc, tf.tile(top, (1, num_boxes))),
tf.less(yc, tf.tile(bottom, (1, num_boxes))),
], axis=2), axis=2)
# Checks of whether a crop is valid.
valid_aspect = tf.logical_and(tf.less(height/width, 2),
tf.less(width/height, 2))
valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True)
valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)
valid_all = tf.cast(tf.reduce_all(tf.concat(
[valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32)
# One indexed, as zero is needed for the case of no matches.
index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)
# Either one-hot, or zeros if there is no valid crop.
selection = tf.equal(tf.reduce_max(index * valid_all), index)
use_crop = tf.reduce_any(selection)
output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast(
selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0)
output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile(
selection[:, tf.newaxis], (1, num_boxes))), axis=0)
return use_crop, output_ltrb, output_masks
def proposal(*args):
return tf.cond(
pred=no_crop_check(),
true_fn=no_crop_proposal,
false_fn=crop_proposal,
)
_, crop_bounds, box_masks = tf.while_loop(
cond=lambda x, *_: tf.logical_not(x),
body=proposal,
loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)],
)
filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0)
mlperf.logger.log(key=mlperf.tags.NUM_CROPPING_ITERATIONS,
value=ssd_constants.NUM_CROP_PASSES)
# Clip boxes to the cropped region.
filtered_boxes = tf.stack([
tf.maximum(filtered_boxes[:, 0], crop_bounds[0]),
tf.maximum(filtered_boxes[:, 1], crop_bounds[1]),
tf.minimum(filtered_boxes[:, 2], crop_bounds[2]),
tf.minimum(filtered_boxes[:, 3], crop_bounds[3]),
], axis=1)
left = crop_bounds[0]
top = crop_bounds[1]
width = crop_bounds[2] - left
height = crop_bounds[3] - top
cropped_boxes = tf.stack([
(filtered_boxes[:, 0] - left) / width,
(filtered_boxes[:, 1] - top) / height,
(filtered_boxes[:, 2] - left) / width,
(filtered_boxes[:, 3] - top) / height,
], axis=1)
# crop_window containing integer coordinates of cropped region. A normalized
# coordinate value of y should be mapped to the image coordinate at
# y * (height - 1).
raw_shape = tf.cast(raw_shape, tf.float32)
crop_window = tf.stack([left * (raw_shape[0] - 1),
top * (raw_shape[1] - 1),
width * raw_shape[0],
height * raw_shape[1]])
crop_window = tf.cast(crop_window, tf.int32)
# Fused op only decodes the cropped portion of an image
cropped_image = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=3)
# Resize converts image dtype from uint8 to float32, without rescaling values.
resized_image = tf.image.resize_images(
cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE])
mlperf.logger.log(key=mlperf.tags.INPUT_SIZE,
value=ssd_constants.IMAGE_SIZE)
cropped_classes = tf.boolean_mask(classes, box_masks, axis=0)
return resized_image, cropped_boxes, cropped_classes