torchbenchmark/models/Super_SloMo/dataloader.py (129 lines of code) (raw):
import torch.utils.data as data
from PIL import Image
import os
import os.path
import random
def _make_dataset(dir):
"""
Creates a 2D list of all the frames in N clips containing
M frames each.
2D List Structure:
[[frame00, frame01,...frameM] <-- clip0
[frame00, frame01,...frameM] <-- clip0
:
[frame00, frame01,...frameM]] <-- clipN
Parameters
----------
dir : string
root directory containing clips.
Returns
-------
list
2D list described above.
"""
framesPath = []
# Find and loop over all the clips in root `dir`.
for index, folder in enumerate(os.listdir(dir)):
clipsFolderPath = os.path.join(dir, folder)
# Skip items which are not folders.
if not (os.path.isdir(clipsFolderPath)):
continue
framesPath.append([])
# Find and loop over all the frames inside the clip.
for image in sorted(os.listdir(clipsFolderPath)):
# Add path to list.
framesPath[index].append(os.path.join(clipsFolderPath, image))
return framesPath
def _make_video_dataset(dir):
"""
Creates a 1D list of all the frames.
1D List Structure:
[frame0, frame1,...frameN]
Parameters
----------
dir : string
root directory containing frames.
Returns
-------
list
1D list described above.
"""
framesPath = []
# Find and loop over all the frames in root `dir`.
for image in sorted(os.listdir(dir)):
# Add path to list.
framesPath.append(os.path.join(dir, image))
return framesPath
def _pil_loader(path, cropArea=None, resizeDim=None, frameFlip=0):
"""
Opens image at `path` using pil and applies data augmentation.
Parameters
----------
path : string
path of the image.
cropArea : tuple, optional
coordinates for cropping image. Default: None
resizeDim : tuple, optional
dimensions for resizing image. Default: None
frameFlip : int, optional
Non zero to flip image horizontally. Default: 0
Returns
-------
list
2D list described above.
"""
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
# Resize image if specified.
resized_img = img.resize(resizeDim, Image.ANTIALIAS) if (resizeDim != None) else img
# Crop image if crop area specified.
cropped_img = img.crop(cropArea) if (cropArea != None) else resized_img
# Flip image horizontally if specified.
flipped_img = cropped_img.transpose(Image.FLIP_LEFT_RIGHT) if frameFlip else cropped_img
return flipped_img.convert('RGB')
class SuperSloMo(data.Dataset):
"""
A dataloader for loading N samples arranged in this way:
|-- clip0
|-- frame00
|-- frame01
:
|-- frame11
|-- frame12
|-- clip1
|-- frame00
|-- frame01
:
|-- frame11
|-- frame12
:
:
|-- clipN
|-- frame00
|-- frame01
:
|-- frame11
|-- frame12
...
Attributes
----------
framesPath : list
List of frames' path in the dataset.
Methods
-------
__getitem__(index)
Returns the sample corresponding to `index` from dataset.
__len__()
Returns the size of dataset. Invoked as len(datasetObj).
__repr__()
Returns printable representation of the dataset object.
"""
def __init__(self, root, transform=None, dim=(640, 360), randomCropSize=(352, 352), train=True):
"""
Parameters
----------
root : string
Root directory path.
transform : callable, optional
A function/transform that takes in
a sample and returns a transformed version.
E.g, ``transforms.RandomCrop`` for images.
dim : tuple, optional
Dimensions of images in dataset. Default: (640, 360)
randomCropSize : tuple, optional
Dimensions of random crop to be applied. Default: (352, 352)
train : boolean, optional
Specifies if the dataset is for training or testing/validation.
`True` returns samples with data augmentation like random
flipping, random cropping, etc. while `False` returns the
samples without randomization. Default: True
"""
# Populate the list with image paths for all the
# frame in `root`.
framesPath = _make_dataset(root)
# Raise error if no images found in root.
if len(framesPath) == 0:
raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"))
self.randomCropSize = randomCropSize
self.cropX0 = dim[0] - randomCropSize[0]
self.cropY0 = dim[1] - randomCropSize[1]
self.root = root
self.transform = transform
self.train = train
self.framesPath = framesPath
def __getitem__(self, index):
"""
Returns the sample corresponding to `index` from dataset.
The sample consists of two reference frames - I0 and I1 -
and a random frame chosen from the 7 intermediate frames
available between I0 and I1 along with it's relative index.
Parameters
----------
index : int
Index
Returns
-------
tuple
(sample, returnIndex) where sample is
[I0, intermediate_frame, I1] and returnIndex is
the position of `random_intermediate_frame`.
e.g.- `returnIndex` of frame next to I0 would be 0 and
frame before I1 would be 6.
"""
sample = []
if (self.train):
### Data Augmentation ###
# To select random 9 frames from 12 frames in a clip
firstFrame = random.randint(0, 3)
# Apply random crop on the 9 input frames
cropX = random.randint(0, self.cropX0)
cropY = random.randint(0, self.cropY0)
cropArea = (cropX, cropY, cropX + self.randomCropSize[0], cropY + self.randomCropSize[1])
# Random reverse frame
#frameRange = range(firstFrame, firstFrame + 9) if (random.randint(0, 1)) else range(firstFrame + 8, firstFrame - 1, -1)
IFrameIndex = random.randint(firstFrame + 1, firstFrame + 7)
if (random.randint(0, 1)):
frameRange = [firstFrame, IFrameIndex, firstFrame + 8]
returnIndex = IFrameIndex - firstFrame - 1
else:
frameRange = [firstFrame + 8, IFrameIndex, firstFrame]
returnIndex = firstFrame - IFrameIndex + 7
# Random flip frame
randomFrameFlip = random.randint(0, 1)
else:
# Fixed settings to return same samples every epoch.
# For validation/test sets.
firstFrame = 0
cropArea = (0, 0, self.randomCropSize[0], self.randomCropSize[1])
IFrameIndex = ((index) % 7 + 1)
returnIndex = IFrameIndex - 1
frameRange = [0, IFrameIndex, 8]
randomFrameFlip = 0
# Loop over for all frames corresponding to the `index`.
for frameIndex in frameRange:
# Open image using pil and augment the image.
image = _pil_loader(self.framesPath[index][frameIndex], cropArea=cropArea, frameFlip=randomFrameFlip)
# Apply transformation if specified.
if self.transform is not None:
image = self.transform(image)
sample.append(image)
return sample, returnIndex
def __len__(self):
"""
Returns the size of dataset. Invoked as len(datasetObj).
Returns
-------
int
number of samples.
"""
return len(self.framesPath)
def __repr__(self):
"""
Returns printable representation of the dataset object.
Returns
-------
string
info.
"""
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
class UCI101Test(data.Dataset):
"""
A dataloader for loading N samples arranged in this way:
|-- clip0
|-- frame00
|-- frame01
|-- frame02
|-- clip1
|-- frame00
|-- frame01
|-- frame02
:
:
|-- clipN
|-- frame00
|-- frame01
|-- frame02
...
Attributes
----------
framesPath : list
List of frames' path in the dataset.
Methods
-------
__getitem__(index)
Returns the sample corresponding to `index` from dataset.
__len__()
Returns the size of dataset. Invoked as len(datasetObj).
__repr__()
Returns printable representation of the dataset object.
"""
def __init__(self, root, transform=None):
"""
Parameters
----------
root : string
Root directory path.
transform : callable, optional
A function/transform that takes in
a sample and returns a transformed version.
E.g, ``transforms.RandomCrop`` for images.
"""
# Populate the list with image paths for all the
# frame in `root`.
framesPath = _make_dataset(root)
# Raise error if no images found in root.
if len(framesPath) == 0:
raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"))
self.root = root
self.framesPath = framesPath
self.transform = transform
def __getitem__(self, index):
"""
Returns the sample corresponding to `index` from dataset.
The sample consists of two reference frames - I0 and I1 -
and a intermediate frame between I0 and I1.
Parameters
----------
index : int
Index
Returns
-------
tuple
(sample, returnIndex) where sample is
[I0, intermediate_frame, I1] and returnIndex is
the position of `intermediate_frame`.
The returnIndex is always 3 and is being returned
to maintain compatibility with the `SuperSloMo`
dataloader where 3 corresponds to the middle frame.
"""
sample = []
# Loop over for all frames corresponding to the `index`.
for framePath in self.framesPath[index]:
# Open image using pil.
image = _pil_loader(framePath)
# Apply transformation if specified.
if self.transform is not None:
image = self.transform(image)
sample.append(image)
return sample, 3
def __len__(self):
"""
Returns the size of dataset. Invoked as len(datasetObj).
Returns
-------
int
number of samples.
"""
return len(self.framesPath)
def __repr__(self):
"""
Returns printable representation of the dataset object.
Returns
-------
string
info.
"""
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
class Video(data.Dataset):
"""
A dataloader for loading all video frames in a folder:
|-- frame0
|-- frame1
:
:
|-- frameN
...
Attributes
----------
framesPath : list
List of frames' path in the dataset.
origDim : tuple
original dimensions of the video.
dim : tuple
resized dimensions of the video (for CNN).
Methods
-------
__getitem__(index)
Returns the sample corresponding to `index` from dataset.
__len__()
Returns the size of dataset. Invoked as len(datasetObj).
__repr__()
Returns printable representation of the dataset object.
"""
def __init__(self, root, transform=None):
"""
Parameters
----------
root : string
Root directory path.
transform : callable, optional
A function/transform that takes in
a sample and returns a transformed version.
E.g, ``transforms.RandomCrop`` for images.
"""
# Populate the list with image paths for all the
# frame in `root`.
framesPath = _make_video_dataset(root)
# Get dimensions of frames
frame = _pil_loader(framesPath[0])
self.origDim = frame.size
self.dim = int(self.origDim[0] / 32) * 32, int(self.origDim[1] / 32) * 32
# Raise error if no images found in root.
if len(framesPath) == 0:
raise(RuntimeError("Found 0 files in: " + root + "\n"))
self.root = root
self.framesPath = framesPath
self.transform = transform
def __getitem__(self, index):
"""
Returns the sample corresponding to `index` from dataset.
The sample consists of two reference frames - I0 and I1.
Parameters
----------
index : int
Index
Returns
-------
list
sample is [I0, I1] where I0 is the frame with index
`index` and I1 is the next frame.
"""
sample = []
# Loop over for all frames corresponding to the `index`.
for framePath in [self.framesPath[index], self.framesPath[index + 1]]:
# Open image using pil.
image = _pil_loader(framePath, resizeDim=self.dim)
# Apply transformation if specified.
if self.transform is not None:
image = self.transform(image)
sample.append(image)
return sample
def __len__(self):
"""
Returns the size of dataset. Invoked as len(datasetObj).
Returns
-------
int
number of samples.
"""
# Using `-1` so that dataloader accesses only upto
# frames [N-1, N] and not [N, N+1] which because frame
# N+1 doesn't exist.
return len(self.framesPath) - 1
def __repr__(self):
"""
Returns printable representation of the dataset object.
Returns
-------
string
info.
"""
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str