opensfm/dense.py (353 lines of code) (raw):
import logging
import typing as t
import cv2
import numpy as np
from opensfm import io
from opensfm import log
from opensfm import pydense
from opensfm import pymap
from opensfm import tracking
from opensfm import types
from opensfm.context import parallel_map
from opensfm.dataset import UndistortedDataSet
logger = logging.getLogger(__name__)
def compute_depthmaps(
data: UndistortedDataSet,
graph: pymap.TracksManager,
reconstruction: types.Reconstruction,
):
"""Compute and refine depthmaps for all shots.
Args:
data: an UndistortedDataset
graph: the tracks graph
reconstruction: the undistorted reconstruction
"""
logger.info("Computing neighbors")
config = data.config
processes = config["processes"]
num_neighbors = config["depthmap_num_neighbors"]
neighbors = {}
common_tracks = common_tracks_double_dict(graph)
for shot in reconstruction.shots.values():
neighbors[shot.id] = find_neighboring_images(
shot, common_tracks, reconstruction, num_neighbors
)
arguments = []
for shot in reconstruction.shots.values():
if len(neighbors[shot.id]) <= 1:
continue
mind, maxd = compute_depth_range(graph, reconstruction, shot, config)
arguments.append((data, neighbors[shot.id], mind, maxd, shot))
parallel_map(compute_depthmap_catched, arguments, processes)
arguments = []
for shot in reconstruction.shots.values():
if len(neighbors[shot.id]) <= 1:
continue
arguments.append((data, neighbors[shot.id], shot))
parallel_map(clean_depthmap_catched, arguments, processes)
arguments = []
for shot in reconstruction.shots.values():
if len(neighbors[shot.id]) <= 1:
continue
arguments.append((data, neighbors[shot.id], shot))
parallel_map(prune_depthmap_catched, arguments, processes)
point_cloud = merge_depthmaps(data, reconstruction)
data.save_point_cloud(*point_cloud, filename="merged.ply")
def compute_depthmap_catched(arguments):
try:
compute_depthmap(arguments)
except Exception as e:
logger.error("Exception on child. Arguments: {}".format(arguments))
logger.exception(e)
def clean_depthmap_catched(arguments):
try:
clean_depthmap(arguments)
except Exception as e:
logger.error("Exception on child. Arguments: {}".format(arguments))
logger.exception(e)
def prune_depthmap_catched(arguments):
try:
prune_depthmap(arguments)
except Exception as e:
logger.error("Exception on child. Arguments: {}".format(arguments))
logger.exception(e)
def compute_depthmap(arguments):
"""Compute depthmap for a single shot."""
log.setup()
data: UndistortedDataSet = arguments[0]
neighbors = arguments[1]
min_depth = arguments[2]
max_depth = arguments[3]
shot = arguments[4]
method = data.config["depthmap_method"]
if data.raw_depthmap_exists(shot.id):
logger.info("Using precomputed raw depthmap {}".format(shot.id))
return
logger.info("Computing depthmap for image {0} with {1}".format(shot.id, method))
de = pydense.DepthmapEstimator()
de.set_depth_range(min_depth, max_depth, 100)
de.set_patchmatch_iterations(data.config["depthmap_patchmatch_iterations"])
de.set_patch_size(data.config["depthmap_patch_size"])
de.set_min_patch_sd(data.config["depthmap_min_patch_sd"])
add_views_to_depth_estimator(data, neighbors, de)
if method == "BRUTE_FORCE":
depth, plane, score, nghbr = de.compute_brute_force()
elif method == "PATCH_MATCH":
depth, plane, score, nghbr = de.compute_patch_match()
elif method == "PATCH_MATCH_SAMPLE":
depth, plane, score, nghbr = de.compute_patch_match_sample()
else:
raise ValueError(
"Unknown depthmap method type "
"(must be BRUTE_FORCE, PATCH_MATCH or PATCH_MATCH_SAMPLE)"
)
good_score = score > data.config["depthmap_min_correlation_score"]
depth = depth * (depth < max_depth) * good_score
# Save and display results
neighbor_ids = [i.id for i in neighbors[1:]]
data.save_raw_depthmap(shot.id, depth, plane, score, nghbr, neighbor_ids)
if data.config["depthmap_save_debug_files"]:
image = data.load_undistorted_image(shot.id)
image = scale_down_image(image, depth.shape[1], depth.shape[0])
ply = depthmap_to_ply(shot, depth, image)
with io.open_wt(data.depthmap_file(shot.id, "raw.npz.ply")) as fout:
fout.write(ply)
if data.config.get("interactive"):
import matplotlib.pyplot as plt
plt.figure()
plt.suptitle("Shot: " + shot.id + ", neighbors: " + ", ".join(neighbor_ids))
plt.subplot(2, 3, 1)
plt.imshow(image)
plt.subplot(2, 3, 2)
plt.imshow(color_plane_normals(plane))
plt.subplot(2, 3, 3)
plt.imshow(depth)
plt.colorbar()
plt.subplot(2, 3, 4)
plt.imshow(score)
plt.colorbar()
plt.subplot(2, 3, 5)
plt.imshow(nghbr)
plt.colorbar()
plt.show()
def clean_depthmap(arguments):
"""Clean depthmap by checking consistency with neighbors."""
log.setup()
data: UndistortedDataSet = arguments[0]
neighbors = arguments[1]
shot = arguments[2]
if data.clean_depthmap_exists(shot.id):
logger.info("Using precomputed clean depthmap {}".format(shot.id))
return
logger.info("Cleaning depthmap for image {}".format(shot.id))
dc = pydense.DepthmapCleaner()
dc.set_same_depth_threshold(data.config["depthmap_same_depth_threshold"])
dc.set_min_consistent_views(data.config["depthmap_min_consistent_views"])
add_views_to_depth_cleaner(data, neighbors, dc)
depth = dc.clean()
# Save and display results
raw_depth, raw_plane, raw_score, raw_nghbr, nghbrs = data.load_raw_depthmap(shot.id)
data.save_clean_depthmap(shot.id, depth, raw_plane, raw_score)
if data.config["depthmap_save_debug_files"]:
image = data.load_undistorted_image(shot.id)
image = scale_down_image(image, depth.shape[1], depth.shape[0])
ply = depthmap_to_ply(shot, depth, image)
with io.open_wt(data.depthmap_file(shot.id, "clean.npz.ply")) as fout:
fout.write(ply)
if data.config.get("interactive"):
import matplotlib.pyplot as plt
plt.figure()
plt.suptitle("Shot: " + shot.id)
plt.subplot(2, 2, 1)
plt.imshow(raw_depth)
plt.colorbar()
plt.subplot(2, 2, 2)
plt.imshow(depth)
plt.colorbar()
plt.show()
def prune_depthmap(arguments):
"""Prune depthmap to remove redundant points."""
log.setup()
data: UndistortedDataSet = arguments[0]
neighbors = arguments[1]
shot = arguments[2]
if data.pruned_depthmap_exists(shot.id):
logger.info("Using precomputed pruned depthmap {}".format(shot.id))
return
logger.info("Pruning depthmap for image {}".format(shot.id))
dp = pydense.DepthmapPruner()
dp.set_same_depth_threshold(data.config["depthmap_same_depth_threshold"])
add_views_to_depth_pruner(data, neighbors, dp)
points, normals, colors, labels = dp.prune()
# Save and display results
data.save_pruned_depthmap(shot.id, points, normals, colors, labels)
if data.config["depthmap_save_debug_files"]:
data.save_point_cloud(points, normals, colors, labels, "pruned.npz.ply")
def aggregate_depthmaps(shot_ids, depthmap_provider):
"""Aggregate depthmaps by concatenation."""
points = []
normals = []
colors = []
labels = []
for shot_id in shot_ids:
p, n, c, l = depthmap_provider(shot_id)
points.append(p)
normals.append(n)
colors.append(c)
labels.append(l)
return (
np.concatenate(points),
np.concatenate(normals),
np.concatenate(colors),
np.concatenate(labels),
)
def merge_depthmaps(
data: UndistortedDataSet, reconstruction: types.Reconstruction
) -> t.Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Merge depthmaps into a single point cloud."""
shot_ids = [s for s in reconstruction.shots if data.pruned_depthmap_exists(s)]
def depthmap_provider(shot_id):
return data.load_pruned_depthmap(shot_id)
return merge_depthmaps_from_provider(shot_ids, depthmap_provider)
def merge_depthmaps_from_provider(
shot_ids: t.Iterable[str], depthmap_provider: t.Callable
) -> t.Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""Merge depthmaps into a single point cloud."""
logger.info("Merging depthmaps")
if not shot_ids:
logger.warning("Depthmaps contain no points. Try using more images.")
return np.array([]), np.array([]), np.array([]), np.array([])
return aggregate_depthmaps(shot_ids, depthmap_provider)
def add_views_to_depth_estimator(data: UndistortedDataSet, neighbors, de):
"""Add neighboring views to the DepthmapEstimator."""
num_neighbors = data.config["depthmap_num_matching_views"]
for shot in neighbors[: num_neighbors + 1]:
assert shot.camera.projection_type == "perspective"
color_image = data.load_undistorted_image(shot.id)
mask = load_combined_mask(data, shot)
gray_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2GRAY)
original_height, original_width = gray_image.shape
width = min(original_width, int(data.config["depthmap_resolution"]))
height = width * original_height // original_width
image = scale_down_image(gray_image, width, height)
mask = scale_image(mask, image.shape[1], image.shape[0], cv2.INTER_NEAREST)
K = shot.camera.get_K_in_pixel_coordinates(width, height)
R = shot.pose.get_rotation_matrix()
t = shot.pose.translation
de.add_view(K, R, t, image, mask)
def add_views_to_depth_cleaner(data: UndistortedDataSet, neighbors, dc):
for shot in neighbors:
if not data.raw_depthmap_exists(shot.id):
continue
depth, plane, score, nghbr, nghbrs = data.load_raw_depthmap(shot.id)
height, width = depth.shape
K = shot.camera.get_K_in_pixel_coordinates(width, height)
R = shot.pose.get_rotation_matrix()
t = shot.pose.translation
dc.add_view(K, R, t, depth)
def load_combined_mask(data: UndistortedDataSet, shot):
"""Load the undistorted mask.
If no mask exists return an array of ones.
"""
mask = data.load_undistorted_combined_mask(shot.id)
if mask is None:
size = int(shot.camera.height), int(shot.camera.width)
return np.ones(size, dtype=np.uint8)
else:
return mask
def load_segmentation_labels(data: UndistortedDataSet, shot):
"""Load the undistorted segmentation labels.
If no segmentation exists return an array of zeros.
"""
if data.undistorted_segmentation_exists(shot.id):
return data.load_undistorted_segmentation(shot.id)
else:
size = shot.camera.height, shot.camera.width
return np.zeros(size, dtype=np.uint8)
def add_views_to_depth_pruner(data: UndistortedDataSet, neighbors, dp):
for shot in neighbors:
if not data.clean_depthmap_exists(shot.id):
continue
depth, plane, score = data.load_clean_depthmap(shot.id)
height, width = depth.shape
color_image = data.load_undistorted_image(shot.id)
labels = load_segmentation_labels(data, shot)
height, width = depth.shape
image = scale_down_image(color_image, width, height)
labels = scale_image(labels, image.shape[1], image.shape[0], cv2.INTER_NEAREST)
K = shot.camera.get_K_in_pixel_coordinates(width, height)
R = shot.pose.get_rotation_matrix()
t = shot.pose.translation
dp.add_view(K, R, t, depth, plane, image, labels)
def compute_depth_range(tracks_manager, reconstruction, shot, config):
"""Compute min and max depth based on reconstruction points."""
depths = []
for track in tracks_manager.get_shot_observations(shot.id):
if track in reconstruction.points:
p = reconstruction.points[track].coordinates
z = shot.pose.transform(p)[2]
depths.append(z)
min_depth = np.percentile(depths, 10) * 0.9
max_depth = np.percentile(depths, 90) * 1.1
config_min_depth = config["depthmap_min_depth"]
config_max_depth = config["depthmap_max_depth"]
return config_min_depth or min_depth, config_max_depth or max_depth
def common_tracks_double_dict(
tracks_manager: pymap.TracksManager,
) -> t.Dict[str, t.Dict[str, t.List[str]]]:
"""List of track ids observed by each image pair.
Return a dict, ``res``, such that ``res[im1][im2]`` is the list of
common tracks between ``im1`` and ``im2``.
"""
common_tracks_per_pair = tracking.all_common_tracks_without_features(tracks_manager)
res = {image: {} for image in tracks_manager.get_shot_ids()}
for (im1, im2), v in common_tracks_per_pair.items():
res[im1][im2] = v
res[im2][im1] = v
return res
def find_neighboring_images(
shot: pymap.Shot,
common_tracks: t.Dict[str, t.Dict[str, t.List[str]]],
reconstruction: types.Reconstruction,
num_neighbors: int,
):
"""Find neighboring images based on common tracks."""
theta_min = np.pi / 60
theta_max = np.pi / 6
ns = []
C1 = shot.pose.get_origin()
for other_id, tracks in common_tracks.get(shot.id, {}).items():
if other_id not in reconstruction.shots:
continue
other = reconstruction.shots[other_id]
score = 0
C2 = other.pose.get_origin()
for track in tracks:
if track in reconstruction.points:
p = reconstruction.points[track].coordinates
theta = angle_between_points(p, C1, C2)
if theta > theta_min and theta < theta_max:
score += 1
if score > 20:
ns.append((other, score))
ns.sort(key=lambda ns: ns[1], reverse=True)
return [shot] + [n for n, s in ns[:num_neighbors]]
def angle_between_points(origin, p1, p2):
a0 = p1[0] - origin[0]
a1 = p1[1] - origin[1]
a2 = p1[2] - origin[2]
b0 = p2[0] - origin[0]
b1 = p2[1] - origin[1]
b2 = p2[2] - origin[2]
dot = a0 * b0 + a1 * b1 + a2 * b2
la = a0 * a0 + a1 * a1 + a2 * a2
lb = b0 * b0 + b1 * b1 + b2 * b2
return np.arccos(dot / np.sqrt(la * lb))
def distance_between_shots(shot, other):
o1 = shot.pose.get_origin()
o2 = other.pose.get_origin()
d = o2 - o1
return np.sqrt(np.sum(d ** 2))
def scale_image(
image: np.ndarray, width: int, height: int, interpolation: int
) -> np.ndarray:
return cv2.resize(image, (width, height), interpolation=interpolation)
def scale_down_image(
image: np.ndarray, width: int, height: int, interpolation=cv2.INTER_AREA
) -> np.ndarray:
width = min(width, image.shape[1])
height = min(height, image.shape[0])
return scale_image(image, width, height, interpolation)
def depthmap_to_ply(shot, depth, image):
"""Export depthmap points as a PLY string"""
height, width = depth.shape
K = shot.camera.get_K_in_pixel_coordinates(width, height)
R = shot.pose.get_rotation_matrix()
t = shot.pose.translation
y, x = np.mgrid[:height, :width]
v = np.vstack((x.ravel(), y.ravel(), np.ones(width * height)))
camera_coords = depth.reshape((1, -1)) * np.linalg.inv(K).dot(v)
points = R.T.dot(camera_coords - t.reshape(3, 1))
vertices = []
for p, c, d in zip(points.T, image.reshape(-1, 3), depth.reshape(-1, 1)):
if d != 0: # ignore points with zero depth
s = "{} {} {} {} {} {}".format(p[0], p[1], p[2], c[0], c[1], c[2])
vertices.append(s)
return io.points_to_ply_string(vertices)
def color_plane_normals(plane):
norm = np.linalg.norm(plane, axis=2)
normal = plane / norm[..., np.newaxis]
normal[..., 1] *= -1 # Reverse Y because it points down
normal[..., 2] *= -1 # Reverse Z because standard colormap does so
return ((normal + 1) * 128).astype(np.uint8)