slowfast/datasets/epickitchens.py [149:201]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            aa_params = dict(
                translate_const=int(img_size_min * 0.45),
                img_mean=tuple([min(255, round(255 * x)) for x in self.cfg.DATA.MEAN]),
            )
            seed = random.randint(0, 100000000)
            frames = [autoaugment.rand_augment_transform(
                auto_augment_desc, aa_params, seed)(frame) for frame in frames]

            # To Tensor: T H W C
            frames = [torch.tensor(np.array(frame)) for frame in frames]
            frames = torch.stack(frames)
        
        # Perform color normalization.
        frames = utils.tensor_normalize(
            frames, self.cfg.DATA.MEAN, self.cfg.DATA.STD
        )

        # T H W C -> C T H W.
        frames = frames.permute(3, 0, 1, 2)

        # Perform data augmentation.
        use_random_resize_crop = self.cfg.DATA.USE_RANDOM_RESIZE_CROPS
        if use_random_resize_crop:
            if self.mode in ["train", "val"]:
                frames = transform.random_resize_crop_video(frames, crop_size, interpolation_mode="bilinear")
                frames, _ = transform.horizontal_flip(0.5, frames)
            else:
                assert len({min_scale, max_scale, crop_size}) == 1
                frames, _ = transform.random_short_side_scale_jitter(
                    frames, min_scale, max_scale
                )
                frames, _ = transform.uniform_crop(frames, crop_size, spatial_sample_index)
        else:
            # Perform data augmentation.
            frames = utils.spatial_sampling(
                frames,
                spatial_idx=spatial_sample_index,
                min_scale=min_scale,
                max_scale=max_scale,
                crop_size=crop_size,
                random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP,
                inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE,
            )
        
        # T H W C -> T C H W.
        if self.mode in ["train", "val"]:
            frames = frames.permute(1, 0, 2, 3) # C T H W -> T C H W
            frames = utils.frames_augmentation(
                frames,
                colorjitter=self.cfg.DATA.COLORJITTER,
                use_grayscale=self.cfg.DATA.GRAYSCALE,
                use_gaussian=self.cfg.DATA.GAUSSIAN
            )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


slowfast/datasets/kinetics.py [268:319]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                aa_params = dict(
                    translate_const=int(img_size_min * 0.45),
                    img_mean=tuple([min(255, round(255 * x)) for x in self.cfg.DATA.MEAN]),
                )
                seed = random.randint(0, 100000000)
                frames = [autoaugment.rand_augment_transform(
                    auto_augment_desc, aa_params, seed)(frame) for frame in frames]

                # To Tensor: T H W C
                frames = [torch.tensor(np.array(frame)) for frame in frames]
                frames = torch.stack(frames)

            # Perform color normalization.
            frames = utils.tensor_normalize(
                frames, self.cfg.DATA.MEAN, self.cfg.DATA.STD
            )

            # Permute frames
            frames = frames.permute(3, 0, 1, 2)

            use_random_resize_crop = self.cfg.DATA.USE_RANDOM_RESIZE_CROPS
            if use_random_resize_crop:
                if self.mode in ["train", "val"]:
                    frames = transform.random_resize_crop_video(frames, crop_size, interpolation_mode="bilinear")
                    frames, _ = transform.horizontal_flip(0.5, frames)
                else:
                    assert len({min_scale, max_scale, crop_size}) == 1
                    frames, _ = transform.random_short_side_scale_jitter(
                        frames, min_scale, max_scale
                    )
                    frames, _ = transform.uniform_crop(frames, crop_size, spatial_sample_index)
            else:
                # Perform data augmentation.
                frames = utils.spatial_sampling(
                    frames,
                    spatial_idx=spatial_sample_index,
                    min_scale=min_scale,
                    max_scale=max_scale,
                    crop_size=crop_size,
                    random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP,
                    inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE,
                )

            # T H W C -> T C H W.
            if self.mode in ["train", "val"]:
                frames = frames.permute(1, 0, 2, 3) # C T H W -> T C H W
                frames = utils.frames_augmentation(
                    frames,
                    colorjitter=self.cfg.DATA.COLORJITTER,
                    use_grayscale=self.cfg.DATA.GRAYSCALE,
                    use_gaussian=self.cfg.DATA.GAUSSIAN
                )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -