scripts/util/camera.py (286 lines of code) (raw):

#!/usr/bin/env python3 # Copyright 2004-present Facebook. All Rights Reserved. # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. import json import math import numpy as np import numpy.polynomial.polynomial as poly from logger import Logger from matrix_operations import is_unitary, normalize_vector from ray import Ray from scipy import linalg from scipy.spatial.transform import Rotation as R K_NEAR_INFINITY = 1e4 INFINITY = 1e50 log = Logger(__name__) log.logger.propagate = False class Camera: """The Camera class contains all attributes of a camera such as: type of lens, rotations, resolution, principal, focal, etc. Attributes: id (string) : name of camera type (string) : type of camera lens i.e. ftheta, rectilinear, orthographic, equiosolid group (string) : name of group to which the camera belongs position (numpy 1x3 array) : position of the camera relative to center of the rig (meters) rotation (numpy 3x3 "matrix"/2D array) : matrix that represents the rotation of the camera right up backward resolution (numpy 1x2 array) : resolution of camera in pixels focal (numpy 1x2 array) : number of pixels per radian at principal point principal (numpy 1x2 dim array) : pixel coordinate of the optical axis distortion (numpy 1x3 array) : array of coefficients for the distortion polynomial cos_fov (float) : cosine value of the fov in radians """ def __init__(self, json_file=None, json_string=None): if json_file: self.load_from_json_file(json_file) elif json_string: self.load_from_json_string(json_string) def load_from_json_file(self, json_file): with open(json_file) as f: json_string = f.read() self.load_from_json_string(json_string) def load_from_json_string(self, json_string): json_obj = json.loads(json_string) log.check_ge(float(json_obj["version"]), 1.0) self.version = float(json_obj["version"]) self.id = str(json_obj["id"]) self.type = str(json_obj["type"]) if "group" in json_obj: self.group = str(json_obj["group"]) self.position = deserialize_list(json_obj["origin"], 3) self.set_rotation(json_obj["forward"], json_obj["up"], json_obj["right"]) self.resolution = deserialize_list(json_obj["resolution"], 2) self.focal = deserialize_list(json_obj["focal"], 2) if "principal" in json_obj: self.principal = deserialize_list(json_obj["principal"], 2) else: self.principal = self.resolution / 2 if "distortion" in json_obj: self.set_distortion(deserialize_list(json_obj["distortion"])) else: self.set_default_distortion() if "fov" in json_obj: self.set_fov(float(json_obj["fov"])) else: self.set_default_fov() def serialize(self): json_obj = { "version": self.version, "id": self.id, "type": self.type, "origin": self.position.tolist(), "forward": self.forward().tolist(), "up": self.up().tolist(), "right": self.right().tolist(), "resolution": self.resolution.tolist(), "focal": self.focal.tolist(), } if hasattr(self, "group"): json_obj["group"] = self.group if not np.array_equal(self.principal, self.resolution / 2): json_obj["principal"] = self.principal.tolist() if np.any(self.__distortion): json_obj["distortion"] = self.__distortion.tolist() if not self.is_default_fov(): json_obj["fov"] = self.get_fov() return json_obj """ Orientation """ def forward(self): return np.negative(self.rotation[2]) def backward(self): return self.rotation[2] def up(self): return self.rotation[1] def right(self): return self.rotation[0] def get_rotation(self): return self.rotation def set_rotation(self, forward, up, right): # TODO: might have to convert to angle axis and back to rotation matrix if there is # a variance from floating point error log.check_lt( np.dot(np.cross(right, up), forward), 0, "rotation must be right-handed" ) self.rotation = np.array([right, up, np.negative(forward)]) tol = 0.001 log.check(is_unitary(self.rotation, tol)) def get_rotation_vector(self): r = R.from_dcm(self.get_rotation()) return r.as_rotvec() def set_rotation_vector(self, rotvec): r = R.from_rotvec(rotvec) self.rotation = r.as_dcm() def get_scalar_focal(self): log.check_eq(self.focal[0], -self.focal[1], "pixels must be square") return self.focal[0] def set_scalar_focal(self, scalar): self.focal = np.asarray([scalar, -scalar]) """ FOV """ def get_fov(self): return math.acos(self.cos_fov) def set_fov(self, fov): self.cos_fov = math.cos(fov) log.check_ge(self.cos_fov, get_default_cos_fov(self.type)) def set_default_fov(self): self.cos_fov = get_default_cos_fov(self.type) def is_default_fov(self): return self.cos_fov == get_default_cos_fov(self.type) def is_behind(self, point): return np.dot(self.backward(), point - self.position) >= 0 def is_outside_fov(self, point): if self.cos_fov == -1: return False if self.cos_fov == 0: return self.is_behind(point) v = point - self.position dotprod = np.dot(self.forward(), v) v_norm_squared = math.pow(linalg.norm(v), 2) return ( dotprod * abs(dotprod) <= self.cos_fov * abs(self.cos_fov) * v_norm_squared ) def is_outside_image_circle(self, pixel): if self.is_default_fov(): return False # find an edge point by projecting a point from the FOV cone sin_fov = math.sqrt(1 - self.cos_fov * self.cos_fov) edge = self.camera_to_sensor(np.array([0, sin_fov, -self.cos_fov])) # pixel is outside FOV if its farther from the principal than the edge point sensor = (pixel - self.principal) / self.focal return math.pow(linalg.norm(sensor), 2) >= linalg.norm(edge) def is_outside_sensor(self, pixel): pixX = pixel[0] pixY = pixel[1] resX = self.resolution[0] resY = self.resolution[1] return not (0 <= pixX and pixX < resX and 0 <= pixY and pixY < resY) def sees(self, point): if self.is_outside_fov(point): return (False, None) pixel = self.world_to_pixel(point) return (not self.is_outside_sensor(pixel), pixel) def overlap(self, camera): """estimate the fraction of the frame that is covered by the other camera""" # just brute force probeCount x probeCount points kProbeCount = 10 inside = 0 for y in range(0, kProbeCount): for x in range(0, kProbeCount): p = np.array([x, y]) * self.resolution / (kProbeCount - 1) if ( not self.is_outside_image_circle(p) and camera.sees(self.point_near_infinity(p))[0] ): inside += 1 return inside / math.pow(kProbeCount, 2) """ Rescale """ def rescale(self, resolution): self.principal = self.principal * (np.asarray(resolution) / self.resolution) self.focal = self.focal * (np.asarray(resolution) / self.resolution) self.resolution = np.asarray(resolution) def normalize(self): self.principal = self.principal / self.resolution self.focal = self.focal / self.resolution self.resolution = np.ones(2) def is_normalized(self): return np.array_equal(self.resolution, np.ones(2)) """ Distortion """ def get_distortion(self): return self.__distortion def get_distortion_max(self): return self.__distortion_max def set_default_distortion(self): self.__distortion = np.zeros(3) self.__distortion_max = INFINITY def set_distortion(self, distortion): count = distortion.size while distortion[count - 1] == 0: count -= 1 if count == 0: return self.set_default_distortion() # distortion polynomial is x + d[0] * x^3 + d[1] * x^5 ... # derivative is: 1 + d[0] * 3 x^2 + d[1] * 5 x^4 ... # using y = x^2: 1 + d[0] * 3 y + d[1] * 5 y^2 ... derivative = np.empty(count + 1) derivative[0] = 1 for i in range(0, count): derivative[i + 1] = distortion[i] * (2 * i + 3) # find smallest rea; root greater than zero in the derivative roots = poly.polyroots(derivative) y = INFINITY for root in roots: if np.isreal(root) and 0 < root and root < y: y = np.real(root) # "convert" to real due to casting warnings self.__distortion = distortion self.__distortion_max = math.sqrt(y) # distortion is modeled in pixel space as: # distort(r) = r + d0 * r^3 + d1 * r^5 def distort(self, r): r = min(r, self.__distortion_max) return self.__distort_factor(r * r) * r def __distort_factor(self, r_squared): index = self.get_distortion().size - 1 result = self.get_distortion()[index] while index > 0: index -= 1 result = self.get_distortion()[index] + r_squared * result return 1 + r_squared * result def undistort(self, y): if np.count_nonzero(self.get_distortion()) == 0: return y if y >= self.distort(self.__distortion_max): return self.__distortion_max smidgen = 1.0 / K_NEAR_INFINITY k_max_steps = 10 x0 = 0 y0 = 0 dy0 = 1 for _step in range(0, k_max_steps): x1 = (y - y0) / dy0 + x0 y1 = self.distort(x1) if abs(y1 - y) < smidgen: return x1 dy1 = (self.distort(x1 + smidgen) - y1) / smidgen log.check_ge(dy1, 0, "went past a maximum") x0 = x1 y0 = y1 dy0 = dy1 return x0 # should not happen """ projection """ def world_to_pixel(self, point): """Compute pixel coordinates from point in rig space""" # transform point in rig space to camera space camera_point = np.matmul(self.rotation, (np.asarray(point) - self.position)) # transform point in camera space to distorted sensor coordinates sensor_point = self.camera_to_sensor(camera_point) # transform distorted sensor coordinates to pixel coordinates return sensor_point * self.focal + self.principal def pixel_to_world(self, pixel, depth=None): """Compute rig coordinates from pixel and return as a parameterized line""" # transform from pixel to distorted sensor coordinates sensor_point = (pixel - self.principal) / self.focal # transform from distorted sensor coordinates to unit camera vector unit = self.sensor_to_camera(sensor_point) # transform from camera space to rig space ray = Ray(self.position, np.matmul(self.rotation.T, unit)) if depth: return ray.point_at(depth) else: return ray def camera_to_sensor(self, camera_point): # FTHETA: r = theta # RECTILINEAR: r = tan(theta) # EQUISOLID: r = 2 sin(theta / 2) # ORTHOGRAPHIC: r = sin(theta) # see https://wiki.panotools.org/Fisheye_Projection if self.type == "FTHETA": # r = theta <=> # r = atan2(|xy|, -z) xy = linalg.norm(camera_point[:2]) unit = normalize_vector(camera_point[:2]) r = math.atan2(xy, -camera_point[2]) return self.distort(r) * unit elif self.type == "RECTILINEAR": # r = tan(theta) <=> # r = |xy| / -z <=> # pre-distortion result is xy / -z xy = linalg.norm(camera_point[:2]) unit = normalize_vector(camera_point[:2]) if -camera_point[2] <= 0: # outside FOV r = math.tan(math.pi / 2) else: r = xy / -camera_point[2] return self.distort(r) * unit elif self.type == "EQUISOLID": # r = 2 sin(theta / 2) <=> # using sin(theta / 2) = sqrt((1 - cos(theta)) / 2) # r = 2 sqrt((1 + z / |xyz|) / 2) xy = linalg.norm(camera_point[:2]) unit = normalize_vector(camera_point[:2]) r = 2 * math.sqrt((1 + camera_point[2] / linalg.norm(camera_point)) / 2) return self.distort(r) * unit elif self.type == "ORTHOGRAPHIC": # r = sin(theta) <=> # r = |xy| / |xyz| <=> # pre-distortion result is xy / |xyz| xy = linalg.norm(camera_point[:2]) unit = normalize_vector(camera_point[:2]) if -camera_point[2] <= 0: # outside FOV r = math.sin(math.pi / 2) else: r = linalg.norm(camera_point[:2] / linalg.norm(camera_point)) return self.distort(r) * unit else: log.fatal(f"unexpected type {self.type}") def sensor_to_camera(self, sensor_point): norm = linalg.norm(sensor_point) norm_squared = math.pow(norm, 2) if norm_squared == 0: return np.array([0, 0, -1]) r = self.undistort(norm) if self.type == "FTHETA": theta = r elif self.type == "RECTILINEAR": theta = math.atan(r) elif self.type == "EQUISOLID": # arcsin function is undefined outside the interval [-1, 1] theta = 2 * math.asin(r / 2) if r <= 2 else math.pi elif self.type == "ORTHOGRAPHIC": theta = math.asin(r) if r <= 1 else math.pi / 2 else: log.fatal(f"unexpected type {self.type}") unit = np.asarray(math.sin(theta) / norm * sensor_point) unit = np.append(unit, -math.cos(theta)) return unit def point_near_infinity(self, pixel): return self.pixel_to_world(pixel, K_NEAR_INFINITY) def get_default_cos_fov(type): if type == "RECTILINEAR" or type == "ORTHOGRAPHIC": return 0 # hemisphere elif type == "FTHETA" or type == "EQUISOLID": return -1 # sphere else: log.fatal(f"unexpected type {type}") def deserialize_list(list, length=None): if length is not None: log.check_eq(len(list), length, "list is not the expected length") return np.asarray(list)