in datasets/video_db.py [0:0]
def _get_clip(self, clip_idx, video_ctr, audio_ctr, video_start_time, audio_start_time, video_clip_duration=None, audio_clip_duration=None):
if video_clip_duration is None:
video_clip_duration = self.video_clip_duration
if audio_clip_duration is None:
audio_clip_duration = self.audio_clip_duration
sample = {}
if self.return_video:
frames, fps, start_time = av_wrappers.av_load_video(
video_ctr,
video_fps=self.video_fps,
start_time=video_start_time,
duration=video_clip_duration,
)
if self.video_transform is not None:
for t in self.video_transform:
frames = t(frames)
sample['frames'] = frames
audio_start_time = audio_start_time - (video_start_time - start_time)
if self.return_audio:
samples, rate = av_wrappers.av_laod_audio(
audio_ctr,
audio_fps=self.audio_fps,
start_time=audio_start_time,
duration=audio_clip_duration,
)
if self.audio_transform is not None:
if isinstance(self.audio_transform, list):
for t in self.audio_transform:
samples, rate = t(samples, rate, audio_clip_duration)
else:
samples, rate = self.audio_transform(samples, rate)
sample['audio'] = samples
if self.return_labels:
lbl = self.labels[clip_idx]
if isinstance(lbl, np.ndarray):
sample['label'] = torch.from_numpy(lbl)
else:
sample['label'] = lbl
if self.return_index:
sample['index'] = clip_idx
return sample