in slowfast/datasets/ava_dataset.py [0:0]
def _images_and_boxes_preprocessing_cv2(self, imgs, boxes):
"""
This function performs preprocessing for the input images and
corresponding boxes for one clip with opencv as backend.
Args:
imgs (tensor): the images.
boxes (ndarray): the boxes for the current clip.
Returns:
imgs (tensor): list of preprocessed images.
boxes (ndarray): preprocessed boxes.
"""
height, width, _ = imgs[0].shape
boxes[:, [0, 2]] *= width
boxes[:, [1, 3]] *= height
boxes = cv2_transform.clip_boxes_to_image(boxes, height, width)
# `transform.py` is list of np.array. However, for AVA, we only have
# one np.array.
boxes = [boxes]
# The image now is in HWC, BGR format.
if self._split == "train": # "train"
imgs, boxes = cv2_transform.random_short_side_scale_jitter_list(
imgs,
min_size=self._jitter_min_scale,
max_size=self._jitter_max_scale,
boxes=boxes,
)
imgs, boxes = cv2_transform.random_crop_list(
imgs, self._crop_size, order="HWC", boxes=boxes
)
if self.random_horizontal_flip:
# random flip
imgs, boxes = cv2_transform.horizontal_flip_list(
0.5, imgs, order="HWC", boxes=boxes
)
elif self._split == "val":
# Short side to test_scale. Non-local and STRG uses 256.
imgs = [cv2_transform.scale(self._crop_size, img) for img in imgs]
boxes = [
cv2_transform.scale_boxes(
self._crop_size, boxes[0], height, width
)
]
imgs, boxes = cv2_transform.spatial_shift_crop_list(
self._crop_size, imgs, 1, boxes=boxes
)
if self._test_force_flip:
imgs, boxes = cv2_transform.horizontal_flip_list(
1, imgs, order="HWC", boxes=boxes
)
elif self._split == "test":
# Short side to test_scale. Non-local and STRG uses 256.
imgs = [cv2_transform.scale(self._crop_size, img) for img in imgs]
boxes = [
cv2_transform.scale_boxes(
self._crop_size, boxes[0], height, width
)
]
if self._test_force_flip:
imgs, boxes = cv2_transform.horizontal_flip_list(
1, imgs, order="HWC", boxes=boxes
)
else:
raise NotImplementedError(
"Unsupported split mode {}".format(self._split)
)
# Convert image to CHW keeping BGR order.
imgs = [cv2_transform.HWC2CHW(img) for img in imgs]
# Image [0, 255] -> [0, 1].
imgs = [img / 255.0 for img in imgs]
imgs = [
np.ascontiguousarray(
# img.reshape((3, self._crop_size, self._crop_size))
img.reshape((3, imgs[0].shape[1], imgs[0].shape[2]))
).astype(np.float32)
for img in imgs
]
# Do color augmentation (after divided by 255.0).
if self._split == "train" and self._use_color_augmentation:
if not self._pca_jitter_only:
imgs = cv2_transform.color_jitter_list(
imgs,
img_brightness=0.4,
img_contrast=0.4,
img_saturation=0.4,
)
imgs = cv2_transform.lighting_list(
imgs,
alphastd=0.1,
eigval=np.array(self._pca_eigval).astype(np.float32),
eigvec=np.array(self._pca_eigvec).astype(np.float32),
)
# Normalize images by mean and std.
imgs = [
cv2_transform.color_normalization(
img,
np.array(self._data_mean, dtype=np.float32),
np.array(self._data_std, dtype=np.float32),
)
for img in imgs
]
# Concat list of images to single ndarray.
imgs = np.concatenate(
[np.expand_dims(img, axis=1) for img in imgs], axis=1
)
if not self._use_bgr:
# Convert image format from BGR to RGB.
imgs = imgs[::-1, ...]
imgs = np.ascontiguousarray(imgs)
imgs = torch.from_numpy(imgs)
boxes = cv2_transform.clip_boxes_to_image(
boxes[0], imgs[0].shape[1], imgs[0].shape[2]
)
return imgs, boxes