def __getitem__()

in data/realestate10k.py [0:0]


    def __getitem__(self, index):
        index = self.rng.randint(self.imageset.shape[0])
        # index = index % self.imageset.shape[0]
        # Load text file containing frame information
        frames = np.loadtxt(
            self.base_file
            + "/frames/%s/%s.txt" % (self.dataset, self.imageset[index])
        )

        image_index = self.rng.choice(frames.shape[0], size=(1,))[0]

        # Chose 15 images within 30 frames of the iniital one
        image_indices = self.rng.randint(80, size=(30,)) - 40 + image_index
        image_indices = np.minimum(
            np.maximum(image_indices, 0), frames.shape[0] - 1
        )

        # Look at the change in angle and choose a hard one
        angles = []
        translations = []
        for viewpoint in range(0, image_indices.shape[0]):
            orig_viewpoint = frames[image_index, 7:].reshape(3, 4)
            new_viewpoint = frames[image_indices[viewpoint], 7:].reshape(3, 4)
            dang, dtrans = get_deltas(orig_viewpoint, new_viewpoint)

            angles += [dang]
            translations += [dtrans]

        angles = np.array(angles)
        translations = np.array(translations)

        mask = image_indices[
            (angles > self.ANGLE_THRESH) | (translations > self.TRANS_THRESH)
        ]

        rgbs = []
        cameras = []
        for i in range(0, self.num_views):
            if i == 0:
                t_index = image_index
            elif mask.shape[0] > 5:
                # Choose a harder angle change
                t_index = mask[self.rng.randint(mask.shape[0])]
            else:
                t_index = image_indices[
                    self.rng.randint(image_indices.shape[0])
                ]

            image = Image.open(
                self.base_file
                + "/frames/%s/%s/" % (self.dataset, self.imageset[index])
                + str(int(frames[t_index, 0]))
                + ".png"
            )
            rgbs += [self.input_transform(image)]

            intrinsics = frames[t_index, 1:7]
            extrinsics = frames[t_index, 7:]

            origK = np.array(
                [
                    [intrinsics[0], 0, intrinsics[2]],
                    [0, intrinsics[1], intrinsics[3]],
                    [0, 0, 1],
                ],
                dtype=np.float32,
            )
            K = np.matmul(self.offset, origK)

            origP = extrinsics.reshape(3, 4)
            P = np.matmul(K, origP)  # Merge these together to match habitat
            P = np.vstack((P, np.zeros((1, 4), dtype=np.float32))).astype(
                np.float32
            )
            P[3, 3] = 1

            Pinv = np.linalg.inv(P)

            cameras += [
                {
                    "P": P,
                    "Pinv": Pinv,
                    "OrigP": origP,
                    "K": self.K,
                    "Kinv": self.invK,
                }
            ]

        return {"images": rgbs, "cameras": cameras}