def get_singleenv_sample()

in data/create_rgb_dataset.py [0:0]


    def get_singleenv_sample(self, num_views) -> Dict[str, np.ndarray]:

        if self.num_samples % self.images_before_reset == 0:
            old_env = self.env._current_episode_index
            self.env.reset()
            print(
                "RESETTING %d to %d \n"
                % (old_env, self.env._current_episode_index),
                flush=True,
            )

        depths = []
        rgbs = []
        cameras = []
        semantics = []

        rand_location = self.env_sim.sample_navigable_point()
        if self.opts.image_type == "fixedRT_baseline":
            rand_angle = self.angle_rng.uniform(0, 2 * np.pi)
        else:
            rand_angle = self.rng.uniform(0, 2 * np.pi)
        rand_rotation = [0, np.sin(rand_angle / 2), 0, np.cos(rand_angle / 2)]
        obs = self.env_sim.get_observations_at(
            position=rand_location,
            rotation=rand_rotation,
            keep_agent_at_new_pose=True,
        )

        for i in range(0, num_views):
            position = rand_location.copy()
            rotation = rand_rotation.copy()
            if self.opts.image_type == "translation":
                position[0] = position[0] + self.rng.rand() * 0.2 - 0.1
            elif self.opts.image_type == "outpaint":
                rotation = quaternion.as_float_array(
                    jitter_quaternions(
                        quaternion.from_float_array(rand_rotation),
                        self.rng,
                        angle=10,
                    )
                ).tolist()
            elif self.opts.image_type == "fixedRT_baseline":
                rand_location = self.rand_location
                rotation = self.rand_rotation

            else:
                position[0] = position[0] + self.rng.rand() * 0.3 - 0.15
                rotation = quaternion.as_float_array(
                    jitter_quaternions(
                        quaternion.from_float_array(rand_rotation),
                        self.rng,
                        angle=10,
                    )
                ).tolist()

            obs = self.env_sim.get_observations_at(
                position=position,
                rotation=rotation,
                keep_agent_at_new_pose=True,
            )

            depths += [torch.Tensor(obs["depth"][..., 0]).unsqueeze(0)]
            rgbs += [self.transform(obs["rgb"].astype(np.float32) / 256.0)]

            if "semantic" in obs.keys():
                instance_semantic = torch.Tensor(
                    obs["semantic"].astype(np.int32)
                ).unsqueeze(0)
                class_semantic = torch.zeros(instance_semantic.size()).long()

                id_to_label = {
                    int(obj.id.split("_")[-1]): obj.category.index()
                    for obj in self.env.sim.semantic_annotations().objects
                }

                for id_scene in np.unique(instance_semantic.numpy()):
                    class_semantic[instance_semantic == id_scene] = id_to_label[
                        id_scene
                    ]

                semantics += [class_semantic]

            agent_state = self.env_sim.get_agent_state().sensor_states["depth"]
            rotation = quaternion.as_rotation_matrix(agent_state.rotation)
            position = agent_state.position
            P, Pinv = get_camera_matrices(position=position, rotation=rotation)
            cameras += [{"P": P, "Pinv": Pinv, "K": self.K, "Kinv": self.invK}]

        self.num_samples += 1
        if len(semantics) > 0:
            return {
                "images": rgbs,
                "depths": depths,
                "cameras": cameras,
                "semantics": semantics,
            }

        return {"images": rgbs, "depths": depths, "cameras": cameras}