def transform()

in sagemaker/src/word_and_line_segmentation.py [0:0]


def transform(image, bbox, text):
    '''
    Function that converts resizes image into the input image tensor for a CNN.
    The bounding boxes are expanded, and
    zero padded to the maximum number of labels. Finally, it is converted into a float
    tensor.
    '''
    
    max_label_n = 128
    
    # Resize the image
    image = np.expand_dims(image, axis=2)
    image = mx.nd.array(image)
    image = resize_short(image, image_size)
    image = image.transpose([2, 0, 1])/255.

    # Expand the bounding box by expand_bb_scale
    bb = bbox.copy()
    new_w = (1 + expand_bb_scale) * bb[:, 2]
    new_h = (1 + expand_bb_scale) * bb[:, 3]
    
    bb[:, 0] = bb[:, 0] - (new_w - bb[:, 2])/2
    bb[:, 1] = bb[:, 1] - (new_h - bb[:, 3])/2
    bb[:, 2] = new_w
    bb[:, 3] = new_h
    bbox = bb 

    bbox = bbox.astype(np.float32)

    # Zero pad the data
    label_n = bbox.shape[0]
    label_padded = np.zeros(shape=(max_label_n, 5))
    label_padded[:label_n, 1:] = bbox
    label_padded[:label_n, 0] = np.ones(shape=(1, label_n))
    label_padded = mx.nd.array(label_padded)
    return image, label_padded