def observations_to_image()

in ss_baselines/common/utils.py [0:0]


def observations_to_image(observation: Dict, info: Dict, pred=None) -> np.ndarray:
    r"""Generate image of single frame from observation and info
    returned from a single environment step().

    Args:
        observation: observation returned from an environment step().
        info: info returned from an environment step().

    Returns:
        generated image of a single frame.
    """
    egocentric_view = []
    if "rgb" in observation:
        observation_size = observation["rgb"].shape[0]
        rgb = observation["rgb"]
        if not isinstance(rgb, np.ndarray):
            rgb = rgb.cpu().numpy()

        egocentric_view.append(rgb)

    # draw depth map if observation has depth info
    if "depth" in observation:
        observation_size = observation["depth"].shape[0]
        depth_map = observation["depth"].squeeze() * 255.0
        if not isinstance(depth_map, np.ndarray):
            depth_map = depth_map.cpu().numpy()

        depth_map = depth_map.astype(np.uint8)
        depth_map = np.stack([depth_map for _ in range(3)], axis=2)
        egocentric_view.append(depth_map)

    assert (
        len(egocentric_view) > 0
    ), "Expected at least one visual sensor enabled."
    egocentric_view = np.concatenate(egocentric_view, axis=1)

    # draw collision
    if "collisions" in info and info["collisions"]["is_collision"]:
        egocentric_view = draw_collision(egocentric_view)

    frame = egocentric_view

    if "top_down_map" in info:
        top_down_map = info["top_down_map"]["map"]
        top_down_map = maps.colorize_topdown_map(
            top_down_map, info["top_down_map"]["fog_of_war_mask"]
        )
        map_agent_pos = info["top_down_map"]["agent_map_coord"]
        top_down_map = maps.draw_agent(
            image=top_down_map,
            agent_center_coord=map_agent_pos,
            agent_rotation=info["top_down_map"]["agent_angle"],
            agent_radius_px=top_down_map.shape[0] // 16,
        )
        if pred is not None:
            from habitat.utils.geometry_utils import quaternion_rotate_vector

            # current_position = sim.get_agent_state().position
            # agent_state = sim.get_agent_state()
            source_rotation = info["top_down_map"]["agent_rotation"]

            rounded_pred = np.round(pred[1])
            direction_vector_agent = np.array([rounded_pred[1], 0, -rounded_pred[0]])
            direction_vector = quaternion_rotate_vector(source_rotation, direction_vector_agent)
            # pred_goal_location = source_position + direction_vector.astype(np.float32)

            grid_size = (
                (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000,
                (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000,
            )
            delta_x = int(-direction_vector[0] / grid_size[0])
            delta_y = int(direction_vector[2] / grid_size[1])

            x = np.clip(map_agent_pos[0] + delta_x, a_min=0, a_max=top_down_map.shape[0])
            y = np.clip(map_agent_pos[1] + delta_y, a_min=0, a_max=top_down_map.shape[1])
            point_padding = 12
            for m in range(x - point_padding, x + point_padding + 1):
                for n in range(y - point_padding, y + point_padding + 1):
                    if np.linalg.norm(np.array([m - x, n - y])) <= point_padding and \
                            0 <= m < top_down_map.shape[0] and 0 <= n < top_down_map.shape[1]:
                        top_down_map[m, n] = (0, 255, 255)
            if np.linalg.norm(rounded_pred) < 1:
                assert delta_x == 0 and delta_y == 0

        if top_down_map.shape[0] > top_down_map.shape[1]:
            top_down_map = np.rot90(top_down_map, 1)

        # scale top down map to align with rgb view
        if pred is None:
            old_h, old_w, _ = top_down_map.shape
            top_down_height = observation_size
            top_down_width = int(float(top_down_height) / old_h * old_w)
            # cv2 resize (dsize is width first)
            top_down_map = cv2.resize(
                top_down_map.astype(np.float32),
                (top_down_width, top_down_height),
                interpolation=cv2.INTER_CUBIC,
            )
        else:
            # draw label
            CATEGORY_INDEX_MAPPING = {
                'chair': 0,
                'table': 1,
                'picture': 2,
                'cabinet': 3,
                'cushion': 4,
                'sofa': 5,
                'bed': 6,
                'chest_of_drawers': 7,
                'plant': 8,
                'sink': 9,
                'toilet': 10,
                'stool': 11,
                'towel': 12,
                'tv_monitor': 13,
                'shower': 14,
                'bathtub': 15,
                'counter': 16,
                'fireplace': 17,
                'gym_equipment': 18,
                'seating': 19,
                'clothes': 20
            }
            index2label = {v: k for k, v in CATEGORY_INDEX_MAPPING.items()}
            pred_label = index2label[pred[0]]
            text_height = int(observation_size * 0.1)

            old_h, old_w, _ = top_down_map.shape
            top_down_height = observation_size - text_height
            top_down_width = int(float(top_down_height) / old_h * old_w)
            # cv2 resize (dsize is width first)
            top_down_map = cv2.resize(
                top_down_map.astype(np.float32),
                (top_down_width, top_down_height),
                interpolation=cv2.INTER_CUBIC,
            )

            top_down_map = np.concatenate(
                [np.ones([text_height, top_down_map.shape[1], 3], dtype=np.int32) * 255, top_down_map], axis=0)
            top_down_map = cv2.putText(top_down_map, 'C_t: ' + pred_label.replace('_', ' '), (10, text_height - 10),
                                       cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 2, cv2.LINE_AA)

        frame = np.concatenate((egocentric_view, top_down_map), axis=1)
    return frame