in datasets/base_video_dataset.py [0:0]
def _sample(cls, video_path: Path, fps: float, start: float, end: float,
df_row: pd.DataFrame, frames_per_clip: int, frame_rate: float,
sample_strategy: str, reader_fn: nn.Module,
rng: np.random.Generator):
"""
Need this since VideoClip/RandomSampler etc are not quite compatible
with this dataset. So recreating that here. Gets the full clip and
crops out a fixed size region.
Args:
video_path: The path to read the video from
fps: What this video's natural FPS is.
start, end: floats of the start and end point in seconds
Returns:
video between start', end'; info of the video
"""
start = max(start, 0) # No way can read negative time anyway
end = max(end, 0) # No way can read negative time anyway
if fps <= 0:
logging.error('Found %f FPS video => likely empty [%s].', fps,
video_path)
fps = frame_rate # So code works, will anyway return black frames
req_fps = frame_rate
if req_fps is None:
req_fps = fps
nframes = int(fps * (end - start))
frames_to_ext = int(round(frames_per_clip * (fps / req_fps)))
# Find a point in the video and crop out
if sample_strategy == SAMPLE_STRAT_RAND:
start_frame = max(nframes - frames_to_ext, 0)
if start_frame > 0:
start_frame = rng.integers(start_frame)
elif sample_strategy == SAMPLE_STRAT_CNTR:
start_frame = max((nframes - frames_to_ext) // 2, 0)
elif sample_strategy == SAMPLE_STRAT_LAST:
start_frame = max(nframes - frames_to_ext, 0)
elif sample_strategy == SAMPLE_STRAT_FIRST:
start_frame = 0
else:
raise NotImplementedError(f'Unknown {sample_strategy}')
new_start = start + max(start_frame / fps, 0)
new_end = start + max((start_frame + frames_to_ext) / fps, 0)
# Do not bleed out.. since this function could be used for anticipation
# as well
new_end = max(min(end, new_end), 0)
# Start from the beginning of the video in case anticipation made it
# go even further back
new_start = min(max(new_start, 0), new_end)
args = [str(video_path), new_start, new_end, fps, df_row]
kwargs = dict(pts_unit='sec')
outputs = reader_fn(*args, **kwargs)
video, _, info = outputs
if new_start >= new_end:
video_frame_sec = new_start * torch.ones((video.size(0), ))
else:
video_frame_sec = torch.linspace(new_start, new_end, video.size(0))
assert video_frame_sec.size(0) == video.size(0)
# Subsample the video to the req_fps
if sample_strategy == SAMPLE_STRAT_LAST:
# From the back
frames_to_keep = range(
len(video))[::-max(int(round(fps / req_fps)), 1)][::-1]
else:
# Otherwise this is fine
frames_to_keep = range(len(video))[::max(int(round(fps /
req_fps)), 1)]
# Convert video to the required fps
video_without_fps_subsample = video
video = video[frames_to_keep]
video_frame_sec = video_frame_sec[frames_to_keep]
sampled_frames = torch.LongTensor(frames_to_keep)
info['video_fps'] = req_fps
# Ideally could have done the following operations only on the
# frames_to_keep and done the above slice after, but to avoid bugs
# and ensuring reproducibility (since earlier it was done separately),
# just doing on all separately
# Pad the video with the last frame, or crop out the extra frames
# so that it is consistent with the frames_per_clip
vid_t = video.size(0)
if video.ndim != 4 or (video.size(0) * video.size(1) * video.size(2) *
video.size(3)) == 0:
# Empty clip if any of the dims are 0, corrupted file likely
logging.warning('Generating empty clip...')
video = torch.zeros((frames_per_clip, 100, 100, 3),
dtype=torch.uint8)
video_frame_sec = -torch.ones((frames_per_clip, ))
sampled_frames = torch.range(0, frames_per_clip, dtype=torch.int64)
elif vid_t < frames_per_clip:
# # Repeat the video
# video_reqfps = torch.cat([video_reqfps] *
# int(math.ceil(frames_per_clip / vid_t)),
# dim=0)
# Pad the last frame..
if sample_strategy == SAMPLE_STRAT_LAST:
# Repeat the first frame
def padding_fn(T, npad):
return torch.cat([T[:1]] * npad + [T], dim=0)
else:
# Repeat the last frame
def padding_fn(T, npad):
return torch.cat([T] + [T[-1:]] * npad, dim=0)
npad = frames_per_clip - vid_t
logging.debug('Too few frames read, padding with %d frames', npad)
video = padding_fn(video, npad)
video_frame_sec = padding_fn(video_frame_sec, npad)
sampled_frames = padding_fn(sampled_frames, npad)
if sample_strategy == SAMPLE_STRAT_LAST:
video = video[-frames_per_clip:]
video_frame_sec = video_frame_sec[-frames_per_clip:]
sampled_frames = sampled_frames[-frames_per_clip:]
else:
video = video[:frames_per_clip]
video_frame_sec = video_frame_sec[:frames_per_clip]
sampled_frames = sampled_frames[:frames_per_clip]
# TODO(rgirdhar): Resample the audio in the same way too..
return (video, video_frame_sec, video_without_fps_subsample,
sampled_frames, info)