def __getitem__()

in datasets/replica.py [0:0]


    def __getitem__(self, idx):
        random.seed()

        trajectory_len = self.seq_len * self.step

        if self.samples_per_epoch:
            idx = random.randint(0, len(self.seq_idxs) - 1)
            idxstr = self.seq_idxs[idx]
            seq_start = random.randint(0, self.episode_len - trajectory_len)
        else:
            trajectories_per_episode = math.floor(self.episode_len / trajectory_len)
            seq_idx = int(idx / trajectories_per_episode)
            seq_idx = int(self.seq_idxs[seq_idx])
            idxstr = str(seq_idx).zfill(2)

            seq_start = (idx % trajectories_per_episode) * trajectory_len
            seq_start = int(seq_start)

        # Load cameras
        episode_path = os.path.join(self.datapath, idxstr)
        with open(os.path.join(episode_path, 'cameras.json'), 'r') as f:
            cameras = json.load(f)

        Rt = []
        K = []
        rgb = []
        depth = []

        sample_indices = list(range(seq_start, seq_start + (self.seq_len * self.step), self.step))
        for idx, i in enumerate(sample_indices):
            Rt.append(torch.Tensor(cameras[i]['Rt']))
            K.append(torch.Tensor(cameras[i]['K']))

            _rgb = os.path.join(episode_path, str(i).zfill(3) + '_rgb.png')
            _rgb = self.resize_transform_rgb(Image.open(_rgb))
            rgb.append(_rgb[:3, :, :])

            if self.depth:
                _depth = os.path.join(episode_path, str(i).zfill(3) + '_depth.tiff')
                # We dont want to normalize depth values
                _depth = self.resize_transform_depth(Image.open(_depth))
                depth.append(torch.from_numpy(np.array(_depth)).unsqueeze(0))

        rgb = torch.stack(rgb)
        depth = torch.stack(depth).float()
        K = torch.stack(K)
        Rt = torch.stack(Rt)

        Rt = Rt.unsqueeze(0)  # add batch dimension
        Rt = normalize_trajectory(Rt, center=self.center, normalize_rotation=self.normalize_rotation)
        Rt = Rt[0]  # remove batch dimension

        if self.single_sample_per_trajectory:
            selected_indices = torch.multinomial(torch.ones(Rt.shape[0]), num_samples=1).squeeze()
            rgb = rgb[selected_indices].unsqueeze(0)
            depth = depth[selected_indices].unsqueeze(0)
            K = K[selected_indices].unsqueeze(0)
            Rt = Rt[selected_indices].unsqueeze(0)

        if self.rot_aug:
            Rt = random_rotation_augment(Rt)

        # Normalize K to img_res
        K = K[:, :3, :3]

        # https://codeyarns.com/tech/2015-09-08-how-to-compute-intrinsic-camera-matrix-for-a-camera.html
        # images were rendered at 512x512 res
        fx = 256.0 / np.tan(np.deg2rad(90.0) / 2)
        fy = 256.0 / np.tan(np.deg2rad(90.0) / 2)
        K[:, 0, 0] = K[:, 0, 0] * fx
        K[:, 1, 1] = K[:, 1, 1] * fy

        downsampling_ratio = self.img_res / 512
        K[:, 0, 0] = K[:, 0, 0] * downsampling_ratio
        K[:, 1, 1] = K[:, 1, 1] * downsampling_ratio
        depth = depth * 1000  # recommended scaling from game engine units to real world units

        if self.depth:
            sample = {'rgb': rgb, 'depth': depth, 'K': K, 'Rt': Rt, 'scene_idx': idx}
        else:
            sample = {'rgb': rgb, 'K': K, 'Rt': Rt, 'scene_idx': idx}

        return sample