in datasets/video_db.py [0:0]
def __getitem__(self, index):
if self.mode == 'clip':
try:
sample_idx = index % self.num_samples
video_ctr, audio_ctr = self._load_sample(sample_idx)
v_ss, v_dur, a_ss, a_dur = self._sample_snippet(video_ctr, audio_ctr)
sample = self._get_clip(sample_idx, video_ctr, audio_ctr, v_ss, a_ss, video_clip_duration=v_dur, audio_clip_duration=a_dur)
if sample is None:
return self[(index+1) % len(self)]
return sample
except Exception:
return self[(index+1) % len(self)]
else:
video_ctr, audio_ctr = self._load_sample(index)
# Load entire video
vs, vf, ss, sf = self._get_time_lims(video_ctr, audio_ctr)
start_time = vs
final_time = vf
if self.return_audio:
start_time = max(vs, ss) if ss < 0 else vs
final_time = min(vf, sf) if ss < 0 else vf
if final_time <= start_time:
final_time = start_time + max(self.video_clip_duration, self.audio_clip_duration)
video_dur = final_time - start_time
sample = self._get_clip(index, video_ctr, audio_ctr, start_time, start_time, video_clip_duration=video_dur, audio_clip_duration=video_dur)
# Split video into overlapping chunks
chunks = defaultdict(list)
if self.return_video:
nf = sample['frames'].shape[1]
chunk_size = int(self.video_clip_duration * self.video_fps)
if chunk_size >= nf:
chunks['frames'] = torch.stack([sample['frames'] for _ in range(self.clips_per_video)])
else:
timestamps = np.linspace(0, max(nf - chunk_size, 1), self.clips_per_video).astype(int)
chunks['frames'] = torch.stack([sample['frames'][:, ss:ss+chunk_size] for ss in timestamps])
if self.return_audio:
nf = sample['audio'].shape[1]
chunk_size = int(self.audio_clip_duration * self.audio_fps_out)
if chunk_size >= nf:
chunks['audio'] = torch.stack([sample['audio'] for _ in range(self.clips_per_video)])
else:
timestamps = np.linspace(0, max(nf - chunk_size, 1), self.clips_per_video).astype(int)
chunks['audio'] = torch.stack([sample['audio'][:, ss:ss+chunk_size] for ss in timestamps])
if self.return_labels:
chunks['label'] = sample['label']
if self.return_index:
ts = torch.from_numpy(np.linspace(start_time, final_time-self.video_clip_duration, self.clips_per_video))
chunks['index'] = torch.stack([sample['index'][:1].repeat(self.clips_per_video), ts.float()], dim=1)
return chunks