in datasets/HT100MDataset.py [0:0]
def _get_video_ffmpeg(self, video_path, start, end):
start_seek = random.randint(start, int(max(start, end - self.num_sec)))
cmd = (
ffmpeg
.input(video_path, ss=start_seek, t=self.num_sec + 0.1)
.filter('fps', fps=self.fps)
)
if self.center_crop:
aw, ah = 0.5, 0.5
else:
aw, ah = random.uniform(0, 1), random.uniform(0, 1)
if self.crop_only:
cmd = (
cmd.crop('(iw - {})*{}'.format(self.size, aw),
'(ih - {})*{}'.format(self.size, ah),
str(self.size), str(self.size))
)
else:
cmd = (
cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
'(ih - min(iw,ih))*{}'.format(ah),
'min(iw,ih)',
'min(iw,ih)')
.filter('scale', self.size, self.size)
)
if self.random_flip and random.uniform(0, 1) > 0.5:
cmd = cmd.hflip()
out, _ = (
cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
.run(capture_stdout=True, quiet=True)
)
video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
video = th.from_numpy(video)
video = video.permute(3, 0, 1, 2)
if video.shape[1] < self.num_frames:
zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
video = th.cat((video, zeros), axis=1)
# return video[:, :self.num_frames]
video = video.float()
video = video / 255.0
return video[:, :self.num_frames], start_seek