in datasets/video_transforms.py [0:0]
def random_short_side_scale_jitter(images, min_size, max_size, boxes=None):
"""
Perform a spatial short scale jittering on the given images and
corresponding boxes.
Args:
images (tensor): images to perform scale jitter. Dimension is
`num frames` x `channel` x `height` x `width`.
min_size (int): the minimal size to scale the frames.
max_size (int): the maximal size to scale the frames.
boxes (ndarray): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
(tensor): the scaled images with dimension of
`num frames` x `channel` x `new height` x `new width`.
(ndarray or None): the scaled boxes with dimension of
`num boxes` x 4.
"""
size = int(round(np.random.uniform(min_size, max_size)))
height = images.shape[2]
width = images.shape[3]
if (width <= height and width == size) or (
height <= width and height == size
):
return images, boxes
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
if boxes is not None:
boxes = boxes * float(new_height) / height
else:
new_width = int(math.floor((float(width) / height) * size))
if boxes is not None:
boxes = boxes * float(new_width) / width
return (
torch.nn.functional.interpolate(
images,
size=(new_height, new_width),
mode="bilinear",
align_corners=False,
),
boxes,
)