in src/transformers/data/datasets/kinetics_datasets.py [0:0]
def __init__(self, mode='train', clip_len=8, frame_sample_rate=8, crop_size=224,
short_side=256, num_segment=1, num_crop=1, test_num_segment=10, test_num_crop=3,
video_dir='', label_dir=''):
self.mode = mode
self.clip_len = clip_len
self.frame_sample_rate = frame_sample_rate
self.crop_size = crop_size
self.short_side = short_side
self.num_segment = num_segment
self.num_crop = num_crop
self.test_num_segment = test_num_segment
self.test_num_crop = test_num_crop
self.video_dir = video_dir
self.label_dir = label_dir
if mode == 'train':
cleaned = pd.read_csv(label_dir, header=None, delimiter=' ')
self.dataset_samples = list(cleaned.values[:, 0])
self.label_array = list(cleaned.values[:, 2])
self.data_transform = video_transforms.Compose([
video_transforms.Resize(self.short_side, interpolation='bilinear'),
video_transforms.RandomResize(ratio=(1, 1.25), interpolation='bilinear'),
video_transforms.RandomHorizontalFlip(),
video_transforms.RandomCrop(size=(self.crop_size, self.crop_size)),
volume_transforms.ClipToTensor(),
video_transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD)
])
elif mode == 'validation':
cleaned = pd.read_csv(label_dir, header=None, delimiter=' ')
self.dataset_samples = list(cleaned.values[:, 0])
self.label_array = list(cleaned.values[:, 2])
self.data_transform = video_transforms.Compose([
video_transforms.Resize(self.short_side, interpolation='bilinear'),
video_transforms.CenterCrop(size=(self.crop_size, self.crop_size)),
volume_transforms.ClipToTensor(),
video_transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD)
])
elif mode == 'test':
cleaned = pd.read_csv(label_dir, header=None, delimiter=' ')
self.dataset_samples = list(cleaned.values[:, 0])
self.label_array = list(cleaned.values[:, 2])
self.data_transform = video_transforms.Compose([
video_transforms.Resize(self.crop_size, interpolation='bilinear'),
video_transforms.VideoThreeCrop(size=(self.crop_size, self.crop_size)),
volume_transforms.ClipToTensor(),
video_transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN,
std=IMAGENET_DEFAULT_STD)
])
else:
print('Invalid mode. We only support train, validation and test.')