metropolis/utils/geometry_utils.py (144 lines of code) (raw):
# Copyright (c) Facebook, Inc. and its affiliates.
# Original copyright notice:
# nuScenes dev-kit.
# Code written by Oscar Beijbom and Alex Lang, 2018.
from enum import IntEnum
from typing import Tuple, TYPE_CHECKING, List
import numpy as np
from pyquaternion import Quaternion
# The following is a trick to solve a cyclic import loop
if TYPE_CHECKING:
from .data_classes import Box # noqa F409
class BoxVisibility(IntEnum):
"""Enumerates the various level of box visibility in an image"""
ALL = 0 # Requires all corners are inside the image.
ANY = 1 # Requires at least one corner visible in the image.
NONE = (
2 # Requires no corners to be inside, i.e. box can be fully outside the image.
)
def view_points(points: np.ndarray, view: np.ndarray, normalize: bool) -> np.ndarray:
"""
This is a helper class that maps 3d points to a 2d plane. It can be used to implement both perspective and
orthographic projections. It first applies the dot product between the points and the view. By convention,
the view should be such that the data is projected onto the first 2 axis. It then optionally applies a
normalization along the third dimension.
For a perspective projection the view should be a 3x3 camera matrix, and normalize=True
For an orthographic projection with translation the view is a 3x4 matrix and normalize=False
For an orthographic projection without translation the view is a 3x3 matrix (optionally 3x4 with last columns
all zeros) and normalize=False
Args:
points: Matrix of points, where each point (x, y, z) is along each column.
view: Defines an arbitrary projection (n <= 4). The projection should be
such that the corners are projected onto the first 2 axis.
normalize: Whether to normalize the remaining coordinate (along the third axis).
Returns:
Mapped point. If normalize=False, the third coordinate is the height.
"""
assert view.shape[0] <= 4
assert view.shape[1] <= 4
assert points.shape[0] == 3
viewpad = np.eye(4)
viewpad[: view.shape[0], : view.shape[1]] = view
nbr_points = points.shape[1]
# Do operation in homogenous coordinates.
points = np.concatenate((points, np.ones((1, nbr_points))))
points = np.dot(viewpad, points)
points = points[:3, :]
if normalize:
points = points / points[2:3, :].repeat(3, 0).reshape(3, nbr_points)
return points
def view_points_eq(points: np.ndarray, width: int, height: int) -> np.ndarray:
"""Project 3D points to equirectangular image
Args:
points: 3xN array of points.
width: Image width.
height: Image height
Returns:
A 2xN array of projected points in pixel coordinates.
"""
# Polar coordinates
u = np.arctan2(points[0, :], points[2, :])
v = np.arctan2(points[1, :], np.sqrt(points[0, :] ** 2 + points[2, :] ** 2))
# Map to image
u_pix = (u / (2 * np.pi) + 0.5) * width
v_pix = (v / np.pi + 0.5) * height
return np.stack([u_pix, v_pix], axis=0)
def inverse_map_eq(
points: np.ndarray,
transform: np.ndarray,
intrinsics: np.ndarray,
eq_size: Tuple[int, int],
) -> np.ndarray:
"""Inverse map function to warp from perspective to equirect. using scikit-image
Args:
points: Points to inverse map, given as an Nx2 array of (x, y) pixel coordinates
in the perspective image.
transform: 3D transformation from the perspective image frame to the equirectangular
image frame, give as a 4x4 matrix.
intrinsics: Intrinsic parameters of the perspective camera, given as a 3x3 matrix
eq_size: Size of the equirectangular image, given as (height, width)
Returns:
Inverse mapped points, given as an Nx2 array of (x, y) pixel coordinates in
the equirectangular image.
"""
# Pixels to image plane
points = np.concatenate(
[points.T, np.ones((1, points.shape[0]), dtype=points.dtype)],
axis=0,
)
points = np.linalg.solve(intrinsics, points)
# Perspective camera frame to equirectangular camera frame
points = np.concatenate(
[points, np.ones((1, points.shape[1]), dtype=points.dtype)], axis=0
)
points = np.dot(transform, points)
points = points[:3, :]
# Equirectangular projection
points = view_points_eq(points, eq_size[0], eq_size[1])
return points.T
def box_in_image(
box: "Box",
intrinsic: np.ndarray,
imsize: Tuple[int, int],
vis_level: int = BoxVisibility.ANY,
) -> bool:
"""Check if a box is visible inside an image without accounting for occlusions.
Args:
box: The box to be checked.
intrinsic: Intrinsic camera matrix.
imsize: (width, height).
vis_level: One of the enumerations of `BoxVisibility`.
Returns:
True if visibility condition is satisfied.
"""
corners_3d = box.corners()
corners_img = view_points(corners_3d, intrinsic, normalize=True)[:2, :]
visible = np.logical_and(corners_img[0, :] > 0, corners_img[0, :] < imsize[0])
visible = np.logical_and(visible, corners_img[1, :] < imsize[1])
visible = np.logical_and(visible, corners_img[1, :] > 0)
visible = np.logical_and(visible, corners_3d[2, :] > 1)
in_front = (
corners_3d[2, :] > 0.1
) # True if a corner is at least 0.1 meter in front of the camera.
if vis_level == BoxVisibility.ALL:
return all(visible) and all(in_front)
elif vis_level == BoxVisibility.ANY:
return any(visible) and all(in_front)
elif vis_level == BoxVisibility.NONE:
return True
else:
raise ValueError(f"vis_level: {vis_level} not valid")
DEFAULT_TRANSLATION = np.array([0, 0, 0])
DEFAULT_ROTATION = Quaternion([1, 0, 0, 0])
def transform_matrix(
translation: np.ndarray = DEFAULT_TRANSLATION,
rotation: Quaternion = DEFAULT_ROTATION,
inverse: bool = False,
) -> np.ndarray:
"""Convert pose to transformation matrix.
Args:
translation: Translation in x, y, z.
rotation: Rotation in quaternions (w ri rj rk).
inverse: Whether to compute inverse transform matrix.
Returns:
Transformation matrix.
"""
tm = np.eye(4)
if inverse:
rot_inv = rotation.rotation_matrix.T
trans = np.transpose(-np.array(translation))
tm[:3, :3] = rot_inv
tm[:3, 3] = rot_inv.dot(trans)
else:
tm[:3, :3] = rotation.rotation_matrix
tm[:3, 3] = np.transpose(np.array(translation))
return tm
def points_in_box(
box: "Box", points: np.ndarray, wlh_factor: float = 1.0
) -> np.ndarray:
"""Checks whether points are inside the box.
Picks one corner as reference (p1) and computes the vector to a target point (v).
Then for each of the 3 axes, project v onto the axis and compare the length.
Inspired by: https://math.stackexchange.com/a/1552579
Args:
box: A 3D bounding box.
points: Points to check.
wlh_factor: Inflates or deflates the box.
Returns:
A boolean array with the check result for each point.
"""
corners = box.corners(wlh_factor=wlh_factor)
p1 = corners[:, 0]
p_x = corners[:, 4]
p_y = corners[:, 1]
p_z = corners[:, 3]
i = p_x - p1
j = p_y - p1
k = p_z - p1
v = points - p1.reshape((-1, 1))
iv = np.dot(i, v)
jv = np.dot(j, v)
kv = np.dot(k, v)
mask_x = np.logical_and(0 <= iv, iv <= np.dot(i, i))
mask_y = np.logical_and(0 <= jv, jv <= np.dot(j, j))
mask_z = np.logical_and(0 <= kv, kv <= np.dot(k, k))
mask = np.logical_and(np.logical_and(mask_x, mask_y), mask_z)
return mask
def split_poly_eq(poly: np.ndarray, width: int) -> List[np.ndarray]:
"""Split a polygon into multiple segments to render it properly on an eq image
When projecting lines (e.g. 3D bounding box edges) to an equirectangular image,
we obtain curves that can cross over from the right to the left edge of the
image (or vice vers), potentially multiple times. Plotting these naively produces
large artifacts. To avoid this, this function splits the projection (given as
a polygon) into multiple segments which, when plotted independently, produce
the correct visualization.
Args:
poly: A polygon, i.e. a sequence of 2D points given as a 2 x N array.
width: Width of the equirectangular image in pixels.
Returns:
A list of polygon segments to be plotted separately.
"""
segments = []
curr_segment = []
for i in range(0, poly.shape[1] - 1):
x1 = poly[0, i]
x2 = poly[0, i + 1]
y1 = poly[1, i]
y2 = poly[1, i + 1]
if x2 > x1:
d_dir = x2 - x1
d_crs = x1 + width - x2
if d_crs < d_dir:
# Crossing is better, we need to split
y_interp = (width - x2) * (y1 - y2) / (x1 + width - x2) + y2
# Finish the current segment
curr_segment.append((x1, y1))
curr_segment.append((0, y_interp))
segments.append(curr_segment)
# Start a new one
curr_segment = [(width, y_interp)]
else:
# No need to split
curr_segment.append((x1, y1))
elif x1 > x2:
d_dir = x1 - x2
d_crs = x2 + width - x1
if d_crs < d_dir:
# Crossing is better, we need to split
y_interp = (width - x1) * (y2 - y1) / (x2 + width - x1) + y1
# Finish the current segment
curr_segment.append((x1, y1))
curr_segment.append((width, y_interp))
segments.append(curr_segment)
# Start a new one
curr_segment = [(0, y_interp)]
else:
# No need to split
curr_segment.append((x1, y1))
else:
# No need to split
curr_segment.append((x1, y1))
curr_segment.append((poly[0, -1], poly[1, -1]))
segments.append(curr_segment)
# TODO: covert segments to numpy
return [np.array(segment).T for segment in segments]