in timesformer/datasets/transform.py [0:0]
def uniform_crop_2crops(images, size, spatial_idx, boxes=None):
"""
Perform uniform spatial sampling on the images and corresponding boxes.
Args:
images (tensor): images to perform uniform crop. The dimension is
`num frames` x `channel` x `height` x `width`.
size (int): size of height and weight to crop the images.
spatial_idx (int): 0, 1, or 2 for left, center, and right crop if width
is larger than height. Or 0, 1, or 2 for top, center, and bottom
crop if height is larger than width.
boxes (ndarray or None): optional. Corresponding boxes to images.
Dimension is `num boxes` x 4.
Returns:
cropped (tensor): images with dimension of
`num frames` x `channel` x `size` x `size`.
cropped_boxes (ndarray or None): the cropped boxes with dimension of
`num boxes` x 4.
"""
assert spatial_idx in [0, 1, 2]
height = images.shape[2]
width = images.shape[3]
if height > width:
x_offset = 0
if height > size * 2:
if spatial_idx == 0:
y_offset = int((height - size * 2) // 2)
elif spatial_idx == 1:
y_offset = int(height - size - ((height - size * 2) // 2))
else:
if spatial_idx == 0:
y_offset = 0
elif spatial_idx == 1:
y_offset = height - size
else:
y_offset = 0
if width > size * 2:
if spatial_idx == 0:
x_offset = int((width - size * 2) // 2)
elif spatial_idx == 1:
x_offset = int(width - size - ((width - size * 2) // 2))
else:
if spatial_idx == 0:
x_offset = 0
elif spatial_idx == 1:
x_offset = width - size
cropped = images[
:, :, y_offset : y_offset + size, x_offset : x_offset + size
]
cropped_boxes = (
crop_boxes(boxes, x_offset, y_offset) if boxes is not None else None
)
return cropped, cropped_boxes