in datasets/HT100MDataset.py [0:0]
def __getitem__(self, idx):
for i_try in range(self._num_retries):
# Get video id and path
index_capped = self.valid_indices[idx]
video_id = self._path_to_videos[index_capped][0]
video_path = os.path.join(self.video_root, video_id + '.mp4')
video_list = []
text_list = []
audio_list = []
while len(video_list) < self.num_clips:
# Get caption
text, start, end = self._get_text(os.path.join(self.caption_root, video_id + '.csv'))
# Decode video
video = None
try:
video, start_sec = self._get_video_ffmpeg(video_path, start, end)
except Exception as e:
print(f"Failed to load video from {video_path} with error {e}")
if video is None:
# let's try another video
if i_try > self._num_retries // 2:
idx = random.randint(0, len(self.valid_indices) - 1)
break
video_list.append(video)
text_list.append(text)
if len(video_list) == self.num_clips:
break
if i_try == self._num_retries - 1:
raise RuntimeError(
"Failed to fetch video after {} retries.".format(
self._num_retries
)
)
# Add reversal option
for i in range(self.num_clips):
# Clone frames and spec
frames = video_list[i].clone()
text = text_list[i].clone()
for r_ix in range(self.num_reverse_clips):
# Reverse audio and video
if r_ix % 2 == 1:
frames = frames.flip(1) # C T H W
text = text.flip(0) # T
video_list.append(frames)
text_list.append(text)
if self.num_reverse_clips == 2:
video_list = [video_list[i] for i in [0, 2, 1, 3]]
text_list = [text_list[i] for i in [0, 2, 1, 3]]
if self.num_clips > 1:
video = th.cat(video_list, dim=0)
text = th.cat(text_list, dim=0)
else:
video = video_list[0]
text = text_list[0]
label = 0
vid_idx = index_capped
return video, text, label, vid_idx, index_capped