in sagemaker/src/word_and_line_segmentation.py [0:0]
def transform(image, bbox, text):
'''
Function that converts resizes image into the input image tensor for a CNN.
The bounding boxes are expanded, and
zero padded to the maximum number of labels. Finally, it is converted into a float
tensor.
'''
max_label_n = 128
# Resize the image
image = np.expand_dims(image, axis=2)
image = mx.nd.array(image)
image = resize_short(image, image_size)
image = image.transpose([2, 0, 1])/255.
# Expand the bounding box by expand_bb_scale
bb = bbox.copy()
new_w = (1 + expand_bb_scale) * bb[:, 2]
new_h = (1 + expand_bb_scale) * bb[:, 3]
bb[:, 0] = bb[:, 0] - (new_w - bb[:, 2])/2
bb[:, 1] = bb[:, 1] - (new_h - bb[:, 3])/2
bb[:, 2] = new_w
bb[:, 3] = new_h
bbox = bb
bbox = bbox.astype(np.float32)
# Zero pad the data
label_n = bbox.shape[0]
label_padded = np.zeros(shape=(max_label_n, 5))
label_padded[:label_n, 1:] = bbox
label_padded[:label_n, 0] = np.ones(shape=(1, label_n))
label_padded = mx.nd.array(label_padded)
return image, label_padded