in ss_baselines/common/utils.py [0:0]
def observations_to_image(observation: Dict, info: Dict, pred=None) -> np.ndarray:
r"""Generate image of single frame from observation and info
returned from a single environment step().
Args:
observation: observation returned from an environment step().
info: info returned from an environment step().
Returns:
generated image of a single frame.
"""
egocentric_view = []
if "rgb" in observation:
observation_size = observation["rgb"].shape[0]
rgb = observation["rgb"]
if not isinstance(rgb, np.ndarray):
rgb = rgb.cpu().numpy()
egocentric_view.append(rgb)
# draw depth map if observation has depth info
if "depth" in observation:
observation_size = observation["depth"].shape[0]
depth_map = observation["depth"].squeeze() * 255.0
if not isinstance(depth_map, np.ndarray):
depth_map = depth_map.cpu().numpy()
depth_map = depth_map.astype(np.uint8)
depth_map = np.stack([depth_map for _ in range(3)], axis=2)
egocentric_view.append(depth_map)
assert (
len(egocentric_view) > 0
), "Expected at least one visual sensor enabled."
egocentric_view = np.concatenate(egocentric_view, axis=1)
# draw collision
if "collisions" in info and info["collisions"]["is_collision"]:
egocentric_view = draw_collision(egocentric_view)
frame = egocentric_view
if "top_down_map" in info:
top_down_map = info["top_down_map"]["map"]
top_down_map = maps.colorize_topdown_map(
top_down_map, info["top_down_map"]["fog_of_war_mask"]
)
map_agent_pos = info["top_down_map"]["agent_map_coord"]
top_down_map = maps.draw_agent(
image=top_down_map,
agent_center_coord=map_agent_pos,
agent_rotation=info["top_down_map"]["agent_angle"],
agent_radius_px=top_down_map.shape[0] // 16,
)
if pred is not None:
from habitat.utils.geometry_utils import quaternion_rotate_vector
# current_position = sim.get_agent_state().position
# agent_state = sim.get_agent_state()
source_rotation = info["top_down_map"]["agent_rotation"]
rounded_pred = np.round(pred[1])
direction_vector_agent = np.array([rounded_pred[1], 0, -rounded_pred[0]])
direction_vector = quaternion_rotate_vector(source_rotation, direction_vector_agent)
# pred_goal_location = source_position + direction_vector.astype(np.float32)
grid_size = (
(maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000,
(maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000,
)
delta_x = int(-direction_vector[0] / grid_size[0])
delta_y = int(direction_vector[2] / grid_size[1])
x = np.clip(map_agent_pos[0] + delta_x, a_min=0, a_max=top_down_map.shape[0])
y = np.clip(map_agent_pos[1] + delta_y, a_min=0, a_max=top_down_map.shape[1])
point_padding = 12
for m in range(x - point_padding, x + point_padding + 1):
for n in range(y - point_padding, y + point_padding + 1):
if np.linalg.norm(np.array([m - x, n - y])) <= point_padding and \
0 <= m < top_down_map.shape[0] and 0 <= n < top_down_map.shape[1]:
top_down_map[m, n] = (0, 255, 255)
if np.linalg.norm(rounded_pred) < 1:
assert delta_x == 0 and delta_y == 0
if top_down_map.shape[0] > top_down_map.shape[1]:
top_down_map = np.rot90(top_down_map, 1)
# scale top down map to align with rgb view
if pred is None:
old_h, old_w, _ = top_down_map.shape
top_down_height = observation_size
top_down_width = int(float(top_down_height) / old_h * old_w)
# cv2 resize (dsize is width first)
top_down_map = cv2.resize(
top_down_map.astype(np.float32),
(top_down_width, top_down_height),
interpolation=cv2.INTER_CUBIC,
)
else:
# draw label
CATEGORY_INDEX_MAPPING = {
'chair': 0,
'table': 1,
'picture': 2,
'cabinet': 3,
'cushion': 4,
'sofa': 5,
'bed': 6,
'chest_of_drawers': 7,
'plant': 8,
'sink': 9,
'toilet': 10,
'stool': 11,
'towel': 12,
'tv_monitor': 13,
'shower': 14,
'bathtub': 15,
'counter': 16,
'fireplace': 17,
'gym_equipment': 18,
'seating': 19,
'clothes': 20
}
index2label = {v: k for k, v in CATEGORY_INDEX_MAPPING.items()}
pred_label = index2label[pred[0]]
text_height = int(observation_size * 0.1)
old_h, old_w, _ = top_down_map.shape
top_down_height = observation_size - text_height
top_down_width = int(float(top_down_height) / old_h * old_w)
# cv2 resize (dsize is width first)
top_down_map = cv2.resize(
top_down_map.astype(np.float32),
(top_down_width, top_down_height),
interpolation=cv2.INTER_CUBIC,
)
top_down_map = np.concatenate(
[np.ones([text_height, top_down_map.shape[1], 3], dtype=np.int32) * 255, top_down_map], axis=0)
top_down_map = cv2.putText(top_down_map, 'C_t: ' + pred_label.replace('_', ' '), (10, text_height - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 2, cv2.LINE_AA)
frame = np.concatenate((egocentric_view, top_down_map), axis=1)
return frame