def __getitem__()

in datasets/HT100MDataset.py [0:0]


    def __getitem__(self, idx):
        
        for i_try in range(self._num_retries):
            
            # Get video id and path
            index_capped = self.valid_indices[idx]
            video_id = self._path_to_videos[index_capped][0]
            video_path = os.path.join(self.video_root, video_id + '.mp4')
            video_list = []
            text_list = []
            audio_list = []
    
            while len(video_list) < self.num_clips:
                # Get caption
                text, start, end = self._get_text(os.path.join(self.caption_root, video_id + '.csv'))

                # Decode video
                video = None
                try:
                    video, start_sec = self._get_video_ffmpeg(video_path, start, end)
                except Exception as e:
                    print(f"Failed to load video from {video_path} with error {e}")
                if video is None:
                    # let's try another video
                    if i_try > self._num_retries // 2:
                        idx = random.randint(0, len(self.valid_indices) - 1)
                    break
                
                video_list.append(video)
                text_list.append(text)

            if len(video_list) == self.num_clips:
                break

        if i_try == self._num_retries - 1:
            raise RuntimeError(
                "Failed to fetch video after {} retries.".format(
                    self._num_retries
                )
            )

        # Add reversal option
        for i in range(self.num_clips):
            # Clone frames and spec
            frames = video_list[i].clone()
            text = text_list[i].clone()

            for r_ix in range(self.num_reverse_clips):
                # Reverse audio and video
                if r_ix % 2 == 1:
                    frames = frames.flip(1) # C T H W 
                    text = text.flip(0) # T
                        
                    video_list.append(frames)
                    text_list.append(text)
        
        if self.num_reverse_clips == 2:
            video_list = [video_list[i] for i in [0, 2, 1, 3]]
            text_list = [text_list[i] for i in [0, 2, 1, 3]]

        if self.num_clips > 1:
            video = th.cat(video_list, dim=0)
            text = th.cat(text_list, dim=0)
        else:
            video = video_list[0]
            text = text_list[0]

        label = 0
        vid_idx = index_capped	

        return video, text, label, vid_idx, index_capped