def __init__()

in src/transformers/data/datasets/kinetics_datasets.py [0:0]


    def __init__(self, mode='train', clip_len=8, frame_sample_rate=8, crop_size=224,
                 short_side=256, num_segment=1, num_crop=1, test_num_segment=10, test_num_crop=3,
                 video_dir='', label_dir=''):
        self.mode = mode
        self.clip_len = clip_len
        self.frame_sample_rate = frame_sample_rate
        self.crop_size = crop_size
        self.short_side = short_side
        self.num_segment = num_segment
        self.num_crop = num_crop
        self.test_num_segment = test_num_segment
        self.test_num_crop = test_num_crop
        self.video_dir = video_dir
        self.label_dir = label_dir

        if mode == 'train':
            cleaned = pd.read_csv(label_dir, header=None, delimiter=' ')
            self.dataset_samples = list(cleaned.values[:, 0])
            self.label_array = list(cleaned.values[:, 2])

            self.data_transform = video_transforms.Compose([
                video_transforms.Resize(self.short_side, interpolation='bilinear'),
                video_transforms.RandomResize(ratio=(1, 1.25), interpolation='bilinear'),
                video_transforms.RandomHorizontalFlip(),
                video_transforms.RandomCrop(size=(self.crop_size, self.crop_size)),
                volume_transforms.ClipToTensor(),
                video_transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN,
                                           std=IMAGENET_DEFAULT_STD)
            ])
        elif mode == 'validation':
            cleaned = pd.read_csv(label_dir, header=None, delimiter=' ')
            self.dataset_samples = list(cleaned.values[:, 0])
            self.label_array = list(cleaned.values[:, 2])

            self.data_transform = video_transforms.Compose([
                video_transforms.Resize(self.short_side, interpolation='bilinear'),
                video_transforms.CenterCrop(size=(self.crop_size, self.crop_size)),
                volume_transforms.ClipToTensor(),
                video_transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN,
                                           std=IMAGENET_DEFAULT_STD)
            ])
        elif mode == 'test':
            cleaned = pd.read_csv(label_dir, header=None, delimiter=' ')
            self.dataset_samples = list(cleaned.values[:, 0])
            self.label_array = list(cleaned.values[:, 2])

            self.data_transform = video_transforms.Compose([
                video_transforms.Resize(self.crop_size, interpolation='bilinear'),
                video_transforms.VideoThreeCrop(size=(self.crop_size, self.crop_size)),
                volume_transforms.ClipToTensor(),
                video_transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN,
                                           std=IMAGENET_DEFAULT_STD)
            ])
        else:
            print('Invalid mode. We only support train, validation and test.')