scripts/ui/verify_data.py (345 lines of code) (raw):
#!/usr/bin/env python3
# Copyright 2004-present Facebook. All Rights Reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Utility functions for verifying inputs for the UI.
The UI requires some files to be present on execution, such as:
- An uncalibrated rig
- Color images (either full-size or resized levels)
Functions for checking the presence of these (in the cases of both
AWS and local renders) are provided amongst other similar checks.
"""
import glob
import json
import os
import posixpath
import sys
import tarfile
dir_scripts = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
dir_root = os.path.dirname(dir_scripts)
sys.path.append(dir_root)
sys.path.append(os.path.join(dir_scripts, "render"))
sys.path.append(os.path.join(dir_scripts, "util"))
import scripts.render.glog_check as glog
from scripts.render.resize import resize_frames
from scripts.util.system_util import list_only_visible_files, merge_lists
global verbose
def log(msg):
"""Prints logs if in verbose mode.
Args:
msg (str): Message to be displayed.
"""
if verbose:
print(msg)
def get_stem(p):
"""Gets the name of a file without its extension.
Args:
p (str): File path.
Returns:
str: Basename of the file (i.e. without its extension).
"""
return os.path.splitext(os.path.basename(p))[0]
def check_image_existence(image_dir, sample_frame_name, recursive=True):
"""Check if we have at least one image in the directory.
Args:
image_dir (str): Path to the local directory.
sample_frame_name (str): Name of the frame being checked. If None is passed in,
the existence of any image file is checked.
recursive (bool, optional): Whether or not to check through all contained dirs.
Returns:
str: Extension of the first file encountered.
"""
types = {".bin", ".exr", ".jpg", ".jpeg", ".pfm", ".png", ".tif", ".tiff"}
paths = glob.iglob(f"{image_dir}/**", recursive=recursive)
for p in paths: # returns the type if it exists
_, ext = os.path.splitext(p)
if ext in types:
frame_name = get_stem(p)
if sample_frame_name is None or frame_name == sample_frame_name:
return ext
return ""
def get_rigs(parent):
"""Gets filenames corresponding to the project rigs.
Args:
parent: class instance
Returns:
list[str]: Rig filenames (assumed to be named *.json).
"""
log(glog.green(f"Looking for rigs in {parent.path_rigs}..."))
ps = list(glob.iglob(f"{parent.path_rigs}/**.json", recursive=False))
if len(ps) == 0:
log(glog.yellow(f"No rigs found in {parent.path_rigs}"))
return ps
def get_rigs_s3(parent):
"""Downloads a calibrated rig from S3 if it exists.
Args:
parent: class instance
Returns:
str: Local filename of the calibrated rig.
"""
path_rigs_s3 = posixpath.join(parent.project_root, "rigs")
log(glog.green(f"Downloading rigs from {path_rigs_s3}..."))
parent.aws_util.s3_cp(
f"{path_rigs_s3}/", f"{parent.path_rigs}/", run_silently=not verbose
)
# If there are rigs in S3 they should now be downloaded to local directory
return get_rigs(parent)
def have_data(parent, dirs, is_levels=False):
"""Checks that the directories expected to have input images are non-empty.
Args:
parent: class instance
dirs (list[str]): Directories to be verified.
is_levels (bool, optional): Whether or not the levels are being used
instead of full-size images (usually only relevant in AWS renders).
Returns:
dict[str, bool]: Map of each directory and whether or not it is non-empty.
"""
have_data = {}
for dir in dirs:
dir_key = dir
if is_levels:
# We need level 0 for thresholds
dir = posixpath.join(dir, "level_0")
log(glog.green(f"Looking for valid images in {dir}..."))
sample_frame = None
if parent.s3_sample_frame and (
dir == parent.path_video_color
or dir.startswith(parent.path_video_color_levels)
):
sample_frame = parent.s3_sample_frame
have_data[dir_key] = check_image_existence(dir, sample_frame) != ""
if not have_data[dir_key]:
log(glog.yellow(f"No valid images found in {dir}"))
return have_data
def have_color(parent, is_levels=False):
"""Checks that the color directories have images.
Args:
parent: class instance
is_levels (bool, optional): Whether or not to use full-size (False) or
level_0 images (True).
Returns:
dict[str, bool]: Map of color directories and whether or not it is non-empty.
"""
dirs = parent.paths_color_levels if is_levels else parent.paths_color
return have_data(parent, dirs, is_levels)
def have_disparity(parent, is_levels=False):
"""Checks that disparity directories have images.
Args:
parent: class instance
is_levels (bool, optional): Whether or not to use full-size (False) or
level_0 images (True).
Returns:
dict[str, bool]: Map of disparity directory and whether or not it is non-empty.
"""
dirs = parent.paths_disparity_levels if is_levels else parent.paths_disparity
return have_data(parent, dirs, is_levels)
def unpack_tars(parent, dir_local, recursive=True):
needle = "*/" if recursive else "*"
tar_files = list(glob.iglob(f"{dir_local}/*{needle}*.tar", recursive=False))
for tar_file in tar_files:
log(glog.green(f"Unpacking {tar_file}..."))
t = tarfile.open(tar_file)
t.extractall(os.path.dirname(tar_file))
t.close()
os.remove(tar_file)
def download_frame_s3(parent, dir_s3, dir_local, frames_s3_names, is_levels=False):
frame_s3_name_first = frames_s3_names[0]
s3_sample_frame = parent.s3_sample_frame
if s3_sample_frame:
if s3_sample_frame in frames_s3_names:
frame_s3_name = s3_sample_frame
else:
log(glog.yellow(f"Cannot find {s3_sample_frame} in {frames_s3_names}"))
return
else:
frame_s3_name = frame_s3_name_first
fn = f"{frame_s3_name}.tar"
if is_levels:
recursive = True
levels = parent.aws_util.s3_ls(dir_s3, run_silently=not verbose)
levels = list(filter(None, levels)) # removes empty results from ls
t = "levels"
srcs = [posixpath.join(dir_s3, level, fn) for level in levels]
dsts = [posixpath.join(dir_local, level, fn) for level in levels]
else:
recursive = False
t = "full-size"
srcs = [posixpath.join(dir_s3, fn)]
dsts = [posixpath.join(dir_local, fn)]
exclude = None
include = None
print(glog.green(f"Downloading {fn} {t} from {dir_s3}..."))
for src, dst in zip(srcs, dsts):
parent.aws_util.s3_cp(
src, dst, exclude, include, recursive=False, run_silently=not verbose
)
unpack_tars(parent, dir_local, recursive)
def get_data_s3(parent, have_data_in, is_levels=False, is_disp=False):
"""Checks if we have color images in S3, and downloads a sample frame.
Args:
parent: class instance
have_data_in dict[str, bool]: Map of directories and whether or not they
are non-empty.
is_levels (bool, optional): Whether or not to use full-size (False) or
level_0 images (True).
is_disp (bool, optional): Whether to download disparity images (True) or
color (False).
Returns:
dict[str, bool]: Map of local data directory and whether or not it is non-empty.
Raises:
Exception: If attempting to get data from S3 on a non-AWS render.
"""
if not parent.is_aws:
raise Exception(f"Not an S3 project: {parent.project_root}")
path_project_s3 = parent.project_root
for dir_local, has_data in have_data_in.items():
if has_data:
# We already have local color for this type
continue
dir_s3 = dir_local.replace(parent.path_project, path_project_s3)
dir_s3_frames = posixpath.join(dir_s3, "level_0") if is_levels else dir_s3
frames_s3_names = get_s3_frame_names(parent, dir_s3_frames)
if len(frames_s3_names) == 0:
continue
download_frame_s3(parent, dir_s3, dir_local, frames_s3_names, is_levels)
# If there are frames in S3 one of them should now be downloaded to local directory
return (
have_disparity(parent, is_levels) if is_disp else have_color(parent, is_levels)
)
def resize_local_frame(parent, dir_full, dir_level, rig_ref):
glog.check(
len(parent.cameras) > 0,
f"No cameras found. Cannot resize local frame {dir_full}",
)
dir_cam = posixpath.join(dir_full, parent.cameras[0])
frames = list_only_visible_files(dir_cam)
glog.check_gt(len(frames), 0, f"No frames found in {dir_cam}")
if parent.s3_sample_frame and dir_full == parent.path_video_color:
frame_name = parent.s3_sample_frame
else:
frame_name, _ = os.path.splitext(sorted(frames)[0])
frame_num = int(frame_name)
log(glog.green(f"Resizing full-size frame {frame_name} in {dir_full}..."))
with open(rig_ref, "r") as f:
rig = json.load(f)
resize_frames(dir_full, dir_level, rig, frame_num, frame_num)
def get_cameras(parent, rig_fn):
"""Finds the camera names in the captured project.
Args:
parent: class instance
rig_fn (str): Path to the rig. If no path is provided, cameras are determined
by the directory structure.
Returns:
list[str]: Names of the cameras.
"""
if rig_fn:
with open(rig_fn, "r") as f:
rig = json.load(f)
return [camera["id"] for camera in rig["cameras"]]
else:
for p in parent.paths_color + parent.paths_color_levels:
if p in parent.paths_color_levels:
p = posixpath.join(p, "level_0")
cameras = list_only_visible_files(p)
if len(cameras) > 0:
return cameras
return []
def get_rig_width(parent, rig_fn):
"""Finds the camera image width.
Args:
parent: class instance
rig_fn (str): Path to the rig
Returns:
int: Camera image width.
"""
with open(rig_fn, "r") as f:
rig = json.load(f)
return int(rig["cameras"][0]["resolution"][0])
def is_frame(name):
"""Whether or not the name is a valid (expected) frame name.
Args:
name (str): Frame name to be tested.
Returns:
bool: Whether or not this is valid (i.e. if it can be cast as an int).
"""
try:
# Verify that the name corresponds to a number
int(get_stem(name))
except Exception:
return False
return True
def get_local_frame_names(dir):
"""Finds all the frames in a directory.
Args:
dir (str): Path to a local directory.
Returns:
list[str]: Sorted list of frame names in the directory. If an invalid directory
is passed in, an empty result is returned.
"""
if os.path.isdir(dir):
log(glog.green(f"Looking for local frames in {dir}"))
frames = list_only_visible_files(dir)
return [get_stem(f) for f in frames if is_frame(f)]
return []
def get_s3_frame_names(parent, dir):
"""Finds all the frames in an S3 directory.
Args:
parent: class instance
dir (str): Path to the S3 directory being scanned.
Returns:
list[str]: Sorted list of frame names in the directory.
"""
if not dir.startswith("s3://"):
path_project_s3 = parent.project_root
dir_s3 = dir.replace(parent.path_project, path_project_s3)
else:
dir_s3 = dir
log(glog.green(f"Looking for S3 frames in {dir_s3}"))
frames = parent.aws_util.s3_ls(dir_s3, run_silently=not verbose)
frames = [f for f in frames if f.endswith(".tar")]
return sorted(get_stem(f) for f in frames if is_frame(f))
def get_frame_names(parent, dir, is_cache=True):
"""Finds all the frames in a local directory.
Args:
parent: class instance
dir (str): Path to the local directory being scanned.
is_cache (bool): Whether or not to check the cache for getting frame names.
Returns:
list[str]: Sorted list of frame names in the directory.
"""
if parent.is_aws:
if is_cache:
frame_names = get_local_frame_names(dir)
else:
frame_names = get_s3_frame_names(parent, dir)
else:
frame_names = get_local_frame_names(dir)
return sorted(f for f in frame_names if is_frame(f))
def print_frame_range(parent, suffix):
"""Displays frame range.
Args:
parent: class instance
suffix (str): Prefixed text to display before the frames.
"""
ff = getattr(parent, f"frames_{suffix}", None)
if not ff:
return
elif len(ff) == 0:
frame_range = ""
elif len(ff) == 1:
frame_range = f"{ff[0]}"
else:
frame_range = f"{ff[0]}, {ff[-1]}"
log(glog.green(f"Frames ({suffix}): [{frame_range}]"))
def download_s3_disparities(parent):
"""Download disparities (both full size and level) from the S3 bucket.
Args:
parent: class instance
"""
have_disp = have_disparity(parent)
if not all(have_disp.values()):
get_data_s3(parent, have_disp)
have_level_disp = have_disparity(parent, is_levels=True)
if not all(have_level_disp.values()):
get_data_s3(parent, have_level_disp, is_levels=True)
def update_frame_names(
parent, data_types=None, image_types=None, update_local=True, update_s3=True
):
"""Updates frame names for given data types
Args:
parent: class instance
data_types (list[str]): List of data types.
image_types (list[str]): List of image types.
"""
global verbose
verbose = parent.verbose
log(glog.green("Getting frame names..."))
glog.check(len(parent.cameras) > 0, "No cameras found!")
camera_ref = parent.cameras[0]
if not data_types:
data_types = ["bg", "video"]
if not image_types:
image_types = ["color", "color_levels", "disparity", "disparity_levels", "bin"]
for t in data_types:
for d in image_types:
if t == "bg" and d == "bin":
continue
suffix = f"{t}_{d}" if d != "bin" else d
p = getattr(parent, f"path_{suffix}", None)
if "_levels" in d:
p = posixpath.join(p, "level_0")
if update_local:
p_local = posixpath.join(p, camera_ref)
setattr(
parent,
f"frames_{suffix}",
get_frame_names(parent, p_local, is_cache=True),
)
print_frame_range(parent, suffix)
if update_s3 and parent.is_aws:
# Cached frames are eventually synced to S3, so any frame in the
# cache should be added to the S3 frames
frames_s3 = get_frame_names(parent, p, is_cache=False)
frames_cache = getattr(parent, f"frames_{suffix}", None)
frames_s3 = sorted(merge_lists(frames_s3, frames_cache))
setattr(parent, f"frames_{suffix}_s3", frames_s3)
print_frame_range(parent, f"{suffix}_s3")
def verify(parent, save_frame_ranges=True):
"""Performs all validation on data. Warnings are displayed if an unexpected structure
is encountered.
"""
global verbose
verbose = parent.verbose
if not verbose:
print(glog.green("\nVerifying data (may take a few seconds)..."))
# Look for a rig
rig_fns = get_rigs(parent)
if not rig_fns and parent.is_aws: # no local rigs, check S3
rig_fns = get_rigs_s3(parent)
glog.check(len(rig_fns) > 0, "Cannot launch UI without any rig")
rig_ref = rig_fns[0]
parent.cameras = get_cameras(parent, rig_ref)
parent.rig_width = get_rig_width(parent, rig_ref)
# We need full-size images if we want to (re-)calibrate
have_full_color = have_color(parent)
if not all(have_full_color.values()) and parent.is_aws: # no local color, check S3
if parent.s3_ignore_fullsize_color:
log(glog.yellow(f"Ignoring full-size color image downloads from S3..."))
else:
have_full_color = get_data_s3(parent, have_full_color)
# We have a rig, but we need color levels to run thresholds for depth
# estimation
have_level_color = have_color(parent, is_levels=True)
if not all(have_level_color.values()) and parent.is_aws: # no local color, check S3
have_level_color = get_data_s3(parent, have_level_color, is_levels=True)
# Check what color types have full-size but not level color
map_level_full = dict(zip(have_level_color, have_full_color))
for dir_level, has_level_color in have_level_color.items():
if not has_level_color:
log(glog.yellow(f"No level colors in {dir_level}"))
dir_full = map_level_full[dir_level]
if not have_full_color[dir_full]:
log(
glog.yellow(
f"No full-size colors in {dir_full}. Cannot create levels"
)
)
continue
else:
resize_local_frame(parent, dir_full, dir_level, rig_ref)
have_level_color = have_color(parent, is_levels=True)
if not have_level_color[parent.path_bg_color_levels]:
log(glog.yellow(f"No background frames found. Cannot render background"))
if not have_level_color[parent.path_video_color_levels]:
log(glog.yellow(f"No video frames found. Cannot render video"))
if not any(have_level_color.values()) and not any(have_full_color.values()):
glog.check(False, f"No colors. Cannot calibrate")
# Download disparities from S3
if parent.is_aws:
download_s3_disparities(parent)
# Get frames for color, color levels, disparity (background and video)
if save_frame_ranges:
update_frame_names(parent)
def make_path_dirs(parent):
"""Create directories expected on the specified tab.
Args:
parent: class instance
"""
for attr in dir(parent):
if attr.startswith("path_"):
p = getattr(parent, attr)
if p:
ext = os.path.splitext(p)[-1].lower()
if not ext: # ignore paths that look like files
os.makedirs(p, exist_ok=True)
def set_default_top_level_paths(parent, mkdirs=False):
"""Defines class referenceable attributes for paths on the specified tab.
Args:
parent: class instance
mkdirs (bool, optional): Whether or not to make the defined directories.
"""
if "path_project" in dir(parent):
project = parent.path_project
else:
project = parent.parent.path_project
for d in [
"aws",
"background",
"calibration",
"flags",
"ipc",
"logs",
"rigs",
"video",
]:
setattr(parent, f"path_{d}", posixpath.join(project, d))
for d in ["color", "color_levels", "disparity", "disparity_levels"]:
setattr(parent, f"path_bg_{d}", posixpath.join(parent.path_background, d))
setattr(parent, f"path_video_{d}", posixpath.join(parent.path_video, d))
parent.path_fg_masks = posixpath.join(parent.path_video, "foreground_masks")
parent.path_fg_masks_levels = posixpath.join(
parent.path_video, "foreground_masks_levels"
)
for d in ["bin", "export", "fused"]:
setattr(parent, f"path_{d}", posixpath.join(parent.path_video, d))
parent.path_aws_key_fn = posixpath.join(parent.path_aws, "key.pem")
parent.path_aws_credentials = posixpath.join(parent.path_aws, "credentials.csv")
parent.path_aws_ip_file = posixpath.join(parent.path_aws, "ec2_info.txt")
# So we know where to look for frames
parent.paths_color = [parent.path_bg_color, parent.path_video_color]
parent.paths_color_levels = [
parent.path_bg_color_levels,
parent.path_video_color_levels,
]
parent.paths_first = parent.paths_color_levels
parent.paths_last = parent.paths_first
parent.paths_disparity = [parent.path_bg_disparity, parent.path_video_disparity]
parent.paths_disparity_levels = [
parent.path_bg_disparity_levels,
parent.path_video_disparity_levels,
]
parent.path_frame_bg = parent.path_bg_color_levels
parent.path_frame_fg = parent.path_video_color_levels
parent.path_first = parent.path_frame_fg
parent.path_last = parent.path_frame_fg
parent.is_farm = project.startswith("s3://")
if "flagfile_basename" in dir(parent) and parent.flagfile_basename:
parent.flagfile_fn = posixpath.join(parent.path_flags, parent.flagfile_basename)
parent.output_dirs = [] # should be populated by every tab
parent.overwrite_output = False
if mkdirs:
make_path_dirs(parent)