in data/multiviewvideo.py [0:0]
def __init__(self,
krtpath : str,
geomdir : str,
imagepath : str,
keyfilter : list,
camerafilter : Callable[[str], bool],
segmentfilter : Callable[[str], bool]=lambda x: True,
framelist : Optional[list]=None,
frameexclude : list=[],
maxframes : int=-1,
bgpath : Optional[str]=None,
returnbg : bool=False,
baseposesegframe : tuple=None,
baseposepath : Optional[str]=None,
fixedcameras : list=[],
fixedframesegframe : Optional[tuple]=None,
fixedcammean : float=0.,
fixedcamstd : float=1.,
fixedcamdownsample : int=4,
standardizeverts : bool=True,
standardizeavgtex : bool=True,
standardizetex : bool=False,
avgtexsize : int=1024,
texsize : int=1024,
subsampletype : Optional[str]=None,
subsamplesize : int=0,
downsample : float=1.,
blacklevel : list=[0., 0., 0.],
maskbright : bool=False,
maskbrightbg : bool=False
):
"""
Dataset class for loading synchronized multi-view video (optionally
with tracked mesh data).
Parameters
----------
krtpath : str,
path to KRT file. See utils.utils.load_krt docstring for details
geomdir : str,
base path to geometry data (tracked meshes, unwrapped textures,
rigid transforms)
imagepath : str,
path to images. should be a string that accepts "seg", "cam", and
"frame" format keys (e.g., "data/{seg}/{cam}/{frame}.png")
keyfilter : list,
list of items to load and return (e.g., images, textures, vertices)
available in this dataset:
'fixedcamimage' -- image from a particular camera (unlike 'image',
this image will always be from a specified camera)
'fixedframeimage' -- image from a particular frame and camera
(always the same)
'verts' -- tensor of Kx3 vertices
'tex' -- texture map as (3 x T_h x T_w) tensor
'avgtex' -- texture map averaged across all cameras
'modelmatrix' -- rigid transformation at frame
(relative to 'base' pose)
'camera' -- camera pose (intrinsic and extrinsic)
'image' -- camera image as (3 x I_h x I_w) tensor
'bg' -- background image as (3 x I_h x I_w) tensor
'pixelcoords' -- pixel coordinates of image to evaluate
(used to subsample training images to reduce memory usage)
camerafilter : Callable[[str], bool],
lambda function for selecting cameras to include in dataset
segmentfilter : Callable[[str], bool]
lambda function for selecting segments to include in dataset
Segments are contiguous sets of frames.
framelist=None : list[tuple[str, str]],
list of (segment, framenumber), used instead of segmentfilter
frameexclude : list[str],
exclude these frames from being loaded
maxframes : int,
maximum number of frames to load
bgpath : Optional[str],
path to background images if available
returnbg : bool,
True to return bg images in each batch, false to store them
into self.bg
baseposesegframe : tuple[str, str]
segment, frame of headpose to be used as the "base" head pose
(used for modelmatrix)
baseposepath : str,
path to headpose to be used as the "base" head pose, used instead
of transfseg, transfframe
fixedcameras : list,
list of cameras to be returned for 'fixedcamimage'
fixedframesegframe : tuple[str, str]
segment and frame to be used for 'fixedframeimage'
fixedcammean : float,
fixedcamstd : float,
norm stats for 'fixedcamimage' and 'fixedframeimage'
standardizeverts : bool,
remove mean/std from vertices
standardizeavgtex : bool,
remove mean/std from avgtex
standardizetex : bool,
remove mean/std from view-dependent texture
avgtexsize : int,
average texture map (averaged across viewpoints) dimension
texsize : int,
texture map dimension
subsampletype : Optional[str],
type of subsampling to do (determines how pixelcoords is generated)
one of [None, "patch", "random", "random2", "stratified"]
subsamplesize : int,
dimension of subsampling
downsample : float,
downsample target image by factor
blacklevel : tuple[float, float, float]
black level to subtract from camera images
maskbright : bool,
True to not include bright pixels in loss
maskbrightbg : bool,
True to not include bright background pixels in loss
"""
# options
self.keyfilter = keyfilter
self.fixedcameras = fixedcameras
self.fixedframesegframe = fixedframesegframe
self.fixedcammean = fixedcammean
self.fixedcamstd = fixedcamstd
self.fixedcamdownsample = fixedcamdownsample
self.standardizeverts = standardizeverts
self.standardizeavgtex = standardizeavgtex
self.standardizetex = standardizetex
self.subsampletype = subsampletype
self.subsamplesize = subsamplesize
self.downsample = downsample
self.returnbg = returnbg
self.blacklevel = blacklevel
self.maskbright = maskbright
self.maskbrightbg = maskbrightbg
# compute camera/frame list
krt = utils.load_krt(krtpath)
self.allcameras = sorted(list(krt.keys()))
self.cameras = list(filter(camerafilter, self.allcameras))
# compute camera positions
self.campos, self.camrot, self.focal, self.princpt, self.size = {}, {}, {}, {}, {}
for cam in self.allcameras:
self.campos[cam] = (-np.dot(krt[cam]['extrin'][:3, :3].T, krt[cam]['extrin'][:3, 3])).astype(np.float32)
self.camrot[cam] = (krt[cam]['extrin'][:3, :3]).astype(np.float32)
self.focal[cam] = (np.diag(krt[cam]['intrin'][:2, :2]) / downsample).astype(np.float32)
self.princpt[cam] = (krt[cam]['intrin'][:2, 2] / downsample).astype(np.float32)
self.size[cam] = np.floor(krt[cam]['size'].astype(np.float32) / downsample).astype(np.int32)
# set up paths
self.imagepath = imagepath
if geomdir is not None:
self.vertpath = os.path.join(geomdir, "tracked_mesh", "{seg}", "{frame:06d}.bin")
self.transfpath = os.path.join(geomdir, "tracked_mesh", "{seg}", "{frame:06d}_transform.txt")
self.texpath = os.path.join(geomdir, "unwrapped_uv_1024", "{seg}", "{cam}", "{frame:06d}.png")
else:
self.transfpath = None
# build list of frames
if framelist is None:
framelist = np.genfromtxt(os.path.join(geomdir, "frame_list.txt"), dtype=np.str)
self.framelist = [tuple(sf) for sf in framelist if segmentfilter(sf[0]) and sf[1] not in frameexclude]
else:
self.framelist = framelist
# truncate or extend frame list
if maxframes <= len(self.framelist):
if maxframes > -1:
self.framelist = self.framelist[:maxframes]
else:
repeats = (maxframes + len(self.framelist) - 1) // len(self.framelist)
self.framelist = (self.framelist * repeats)[:maxframes]
# cartesian product with cameras
self.framecamlist = [(x, cam)
for x in self.framelist
for cam in (self.cameras if len(self.cameras) > 0 else [None])]
# set base pose
if baseposepath is not None:
self.basetransf = np.genfromtxt(baseposepath, max_rows=3).astype(np.float32)
elif baseposesegframe is not None:
self.basetransf = np.genfromtxt(self.transfpath.format(
seg=baseposesegframe[0],
frame=baseposesegframe[1])).astype(np.float32)
else:
raise Exception("base transformation must be provided")
# load normstats
if "avgtex" in keyfilter or "tex" in keyfilter:
texmean = np.asarray(Image.open(os.path.join(geomdir, "tex_mean.png")), dtype=np.float32)
self.texstd = float(np.genfromtxt(os.path.join(geomdir, "tex_var.txt")) ** 0.5)
if "avgtex" in keyfilter:
self.avgtexsize = avgtexsize
avgtexmean = texmean
if avgtexmean.shape[0] != self.avgtexsize:
avgtexmean = cv2.resize(avgtexmean, dsize=(self.avgtexsize, self.avgtexsize), interpolation=cv2.INTER_LINEAR)
self.avgtexmean = avgtexmean.transpose((2, 0, 1)).astype(np.float32).copy("C")
if "tex" in keyfilter:
self.texsize = texsize
if texmean.shape[0] != self.texsize:
texmean = cv2.resize(texmean, dsize=self.texsize, interpolation=cv2.INTER_LINEAR)
self.texmean = texmean.transpose((2, 0, 1)).astype(np.float32).copy("C")
if "verts" in keyfilter:
self.vertmean = np.fromfile(os.path.join(geomdir, "vert_mean.bin"), dtype=np.float32).reshape((-1, 3))
self.vertstd = float(np.genfromtxt(os.path.join(geomdir, "vert_var.txt")) ** 0.5)
# load background images
if bgpath is not None:
readpool = multiprocessing.Pool(40)
reader = ImageLoader(bgpath, blacklevel)
self.bg = {cam: (image, bgmask)
for cam, image, bgmask
in readpool.starmap(reader, zip(self.cameras, [self.size[x] for x in self.cameras]))
if image is not None}
else:
self.bg = {}