def get_obs()

in gym_genesis/tasks/cube_pick.py [0:0]


    def get_obs(self):
        # (B, X)
        # === agent (robot) state features ===
        eef_pos = self.eef.get_pos() # (B, 3)
        eef_rot = self.eef.get_quat() # (B, 4)
        gripper = self.franka.get_dofs_position()[..., 7:9] # (B, 2)

        # === environment (object) state features ===
        cube_pos = self.cube.get_pos() # (B, 3)
        cube_rot = self.cube.get_quat() # (B, 4)
        diff = eef_pos - cube_pos # (B, 3) (privileged)
        dist = torch.norm(diff, dim=1, keepdim=True) # (B, 1) (privileged)

        # compose observation dicts
        agent_pos = torch.cat([eef_pos, eef_rot, gripper], dim=1).float()        # (B, 9)
        environment_state = torch.cat([cube_pos, cube_rot, diff, dist], dim=1).float()  # (B, 11)

        obs = {
            "agent_pos": agent_pos,                  # (B, 9)
            "environment_state": environment_state,  # (B, 11)
        }

        if self.enable_pixels:
            #TODO (jadechoghari): it's hacky but keep it for the sake of saving time
            if self.strip_environment_state is True:
                del obs["environment_state"]
            if self.camera_capture_mode == "per_env":
                # Capture a separate image for each environment
                batch_imgs = []
                for i in range(self.num_envs):
                    pos_i = self.scene.envs_offset[i] + np.array([3.5, 0.0, 2.5])
                    lookat_i = self.scene.envs_offset[i] + np.array([0, 0, 0.5])
                    self.cam.set_pose(pos=pos_i, lookat=lookat_i)
                    img = self.cam.render()[0]
                    batch_imgs.append(img)
                pixels = np.stack(batch_imgs, axis=0)  # shape: (B, H, W, 3)
                assert pixels.ndim == 4, f"pixels shape {pixels.shape} is not 4D (B, H, W, 3)"
            elif self.camera_capture_mode == "global":
                # Capture a single global/overview image
                pixels = self.cam.render()[0]  # shape: (H, W, 3)
                assert pixels.ndim == 3, f"pixels shape {pixels.shape} is not 3D (H, W, 3)"
            else:
                raise ValueError(f"Unknown camera_capture_mode: {self.camera_capture_mode}")
            obs["pixels"] = pixels
        return obs