def get_data()

in sat/data_video.py [0:0]


    def get_data(self, root, filename):
        try:
            video_size, fps, max_num_frames, skip_frms_num = (
                self.video_size,
                self.fps,
                self.max_num_frames,
                self.skip_frms_num,
            )
            video_path = os.path.join(root, filename)
            vr = VideoReader(uri=video_path, height=-1, width=-1)
            actual_fps = vr.get_avg_fps()
            ori_vlen = len(vr)

            if (ori_vlen - skip_frms_num * 2) / actual_fps * fps > max_num_frames and actual_fps >= fps:
                num_frames = max_num_frames
                start = random.randint(skip_frms_num, ori_vlen - skip_frms_num - int(num_frames / fps * actual_fps))
                end = int(start + num_frames / fps * actual_fps)
                end_safty = min(int(start + num_frames / fps * actual_fps), int(ori_vlen))
                indices = np.arange(start, end, (end - start) // num_frames).astype(int)
                temp_frms = vr.get_batch(np.arange(start, end_safty))
                assert temp_frms is not None
                tensor_frms = torch.from_numpy(temp_frms) if type(temp_frms) is not torch.Tensor else temp_frms
                tensor_frms = tensor_frms[torch.tensor((indices - start).tolist())]
            else:
                if ori_vlen - skip_frms_num * 2 > max_num_frames:
                    num_frames = max_num_frames
                    start = int(skip_frms_num)
                    end = int(ori_vlen - skip_frms_num)
                    indices = np.arange(start, end, (end - start) // num_frames).astype(int)
                    temp_frms = vr.get_batch(np.arange(start, end))
                    assert temp_frms is not None
                    tensor_frms = torch.from_numpy(temp_frms) if type(temp_frms) is not torch.Tensor else temp_frms
                    tensor_frms = tensor_frms[torch.tensor((indices - start).tolist())]
                else:

                    def nearest_smaller_4k_plus_1(n):
                        remainder = n % 4
                        if remainder == 0:
                            return n - 3
                        else:
                            return n - remainder + 1

                    start = int(skip_frms_num)
                    end = int(ori_vlen - skip_frms_num)
                    num_frames = nearest_smaller_4k_plus_1(
                        end - start
                    )  # 3D VAE requires the number of frames to be 4k+1
                    end = int(start + num_frames)
                    temp_frms = vr.get_batch(np.arange(start, end))
                    assert temp_frms is not None
                    tensor_frms = torch.from_numpy(temp_frms) if type(temp_frms) is not torch.Tensor else temp_frms

            tensor_frms = pad_last_frame(
                tensor_frms, max_num_frames
            )  # the len of indices may be less than num_frames, due to round error
            tensor_frms = tensor_frms.permute(0, 3, 1, 2)  # [T, H, W, C] -> [T, C, H, W]
            tensor_frms = resize_for_rectangle_crop(tensor_frms, video_size, reshape_mode="center")
            tensor_frms = (tensor_frms - 127.5) / 127.5

            # caption
            caption_path = os.path.join(root, filename.replace(".mp4", ".txt")).replace("videos", "labels")
            if os.path.exists(caption_path):
                caption = open(caption_path, "r").read().splitlines()[0]
            else:
                caption = ""
            item = {
                "mp4": tensor_frms,
                "txt": caption,
                "num_frames": num_frames,
                "fps": fps,
            }
            return item
        except Exception as e:
            print(f"ERROR when reading video {video_path}, trying to read a valid one. ERROR msg: {e}")
            return None