in data/multiviewvideo.py [0:0]
def __getitem__(self, idx):
(seg, frame), cam = self.framecamlist[idx]
result = {}
result["segid"] = seg
result["frameid"] = frame
if cam is not None:
result["cameraid"] = cam
validinput = True
# image from one or more cameras (those cameras are fixed over the dataset)
if "fixedcamimage" in self.keyfilter:
ninput = len(self.fixedcameras)
fixedcamimage = []
for i in range(ninput):
imagepath = self.imagepath.format(seg=seg, cam=self.fixedcameras[i], frame=int(frame))
image = utils.downsample(
np.asarray(Image.open(imagepath), dtype=np.uint8), self.fixedcamdownsample).transpose((2, 0, 1)).astype(np.float32)
fixedcamimage.append(image)
fixedcamimage = np.concatenate(fixedcamimage, axis=1)
fixedcamimage[:] -= self.fixedcammean
fixedcamimage[:] /= self.fixedcamstd
result["fixedcamimage"] = fixedcamimage
# image from one or more cameras, always the same frame
if "fixedframeimage" in self.keyfilter:
ninput = len(self.fixedcameras)
fixedframeimage = []
for i in range(ninput):
imagepath = self.imagepath.format(
seg=self.fixedframesegframe[0],
cam=self.fixedcameras[i],
frame=int(self.fixedframesegframe[1]))
image = utils.downsample(
np.asarray(Image.open(imagepath), dtype=np.uint8), self.fixedcamdownsample).transpose((2, 0, 1)).astype(np.float32)
fixedframeimage.append(image)
fixedframeimage = np.concatenate(fixedframeimage, axis=1)
fixedframeimage[:] -= self.fixedcammean
fixedframeimage[:] /= self.fixedcamstd
result["fixedframeimage"] = fixedframeimage
# vertices
for k in ["verts", "verts_next"]:
if k in self.keyfilter:
vertpath = self.vertpath.format(seg=seg, frame=int(frame) + (1 if k == "verts_next" else 0))
verts = np.fromfile(vertpath, dtype=np.float32)
if self.standardizeverts:
verts -= self.vertmean.ravel()
verts /= self.vertstd
result[k] = verts.reshape((-1, 3))
# texture averaged over all cameras for a single frame
for k in ["avgtex", "avgtex_next"]:
if k in self.keyfilter:
texpath = self.texpath.format(seg=seg, cam="average", frame=int(frame) + (1 if k == "avgtex_next" else 0))
try:
tex = np.asarray(Image.open(texpath), dtype=np.uint8)
if tex.shape[0] != self.avgtexsize:
tex = cv2.resize(tex, dsize=(self.avgtexsize, self.avgtexsize), interpolation=cv2.INTER_LINEAR)
tex = tex.transpose((2, 0, 1)).astype(np.float32)
except:
tex = np.zeros((3, self.avgtexsize, self.avgtexsize), dtype=np.float32)
validinput = False
if np.sum(tex) == 0:
validinput = False
texmask = np.sum(tex, axis=0) != 0
if self.standardizeavgtex:
tex -= self.avgtexmean
tex /= self.texstd
tex[:, ~texmask] = 0.
result[k] = tex
# keep track of whether we fail to load any of the input
result["validinput"] = np.float32(1.0 if validinput else 0.0)
if "modelmatrix" in self.keyfilter or "modelmatrixinv" in self.keyfilter or "camera" in self.keyfilter:
def to4x4(m):
return np.r_[m, np.array([[0., 0., 0., 1.]], dtype=np.float32)]
# per-frame rigid transformation of scene/object
for k in ["modelmatrix", "modelmatrix_next"]:
if k in self.keyfilter:
if self.transfpath is not None:
transfpath = self.transfpath.format(seg=seg, frame=int(frame) + (1 if k == "modelmatrix_next" else 0))
try:
frametransf = np.genfromtxt(os.path.join(transfpath)).astype(np.float32)
except:
frametransf = None
result[k] = to4x4(np.dot(
np.linalg.inv(to4x4(frametransf)),
to4x4(self.basetransf))[:3, :4])
else:
result[k] = np.eye(3, 4, dtype=np.float32)#np.linalg.inv(to4x4(self.basetransf))[:3, :4]
# inverse of per-frame rigid transformation of scene/object
for k in ["modelmatrixinv", "modelmatrixinv_next"]:
if k in self.keyfilter:
if self.transfpath is not None:
transfpath = self.transfpath.format(seg=seg, frame=int(frame) + (1 if k == "modelmatrixinv_next" else 0))
try:
frametransf = np.genfromtxt(os.path.join(transfpath)).astype(np.float32)
except:
frametransf = None
result[k] = to4x4(np.dot(
np.linalg.inv(to4x4(self.basetransf)),
to4x4(frametransf))[:3, :4])
else:
result[k] = np.eye(3, 4, dtype=np.float32)#self.basetransf
# camera-specific data
if cam is not None:
# camera pose
if "camera" in self.keyfilter:
result["campos"] = np.dot(self.basetransf[:3, :3].T, self.campos[cam] - self.basetransf[:3, 3])
result["camrot"] = np.dot(self.basetransf[:3, :3].T, self.camrot[cam].T).T
result["focal"] = self.focal[cam]
result["princpt"] = self.princpt[cam]
result["camindex"] = self.allcameras.index(cam)
# per-frame / per-camera unwrapped texture map
if "tex" in self.keyfilter:
texpath = self.texpath.format(seg=seg, cam=cam, frame=frame)
try:
tex = np.asarray(Image.open(texpath), dtype=np.uint8).transpose((2, 0, 1)).astype(np.float32)
except:
tex = np.zeros((3, self.texsize, self.texsize), dtype=np.float32)
assert tex.shape[1] == self.texsize
texmask = np.sum(tex, axis=0) != 0
if self.standardizetex:
tex -= self.texmean
tex /= self.texstd
tex[:, ~texmask] = 0.
result["tex"] = tex
result["texmask"] = texmask
# camera images
if "image" in self.keyfilter:
# target image
imagepath = self.imagepath.format(seg=seg, cam=cam, frame=int(frame))
image = utils.downsample(
np.asarray(Image.open(imagepath), dtype=np.uint8),
self.downsample).transpose((2, 0, 1)).astype(np.float32)
height, width = image.shape[1:3]
valid = np.float32(1.0) if np.sum(image) != 0 else np.float32(0.)
# remove black level
result["image"] = np.clip(image - np.array(self.blacklevel, dtype=np.float32)[:, None, None], 0., None)
result["imagevalid"] = valid
# optionally mask pixels with bright background values
if self.maskbrightbg and cam in self.bg:
result["imagemask"] = self.bg[cam][1]
# optionally mask pixels with bright values
if self.maskbright:
if "imagemask" in result:
result["imagemask"] *= np.where(
(image[0] > 245.) |
(image[1] > 245.) |
(image[2] > 245.), 0., 1.)[None, :, :]
else:
result["imagemask"] = np.where(
(image[0] > 245.) |
(image[1] > 245.) |
(image[2] > 245.), 0., 1.).astype(np.float32)[None, :, :]
# background image
if "bg" in self.keyfilter and self.returnbg:
result["bg"] = self.bg[cam][0]
# image pixel coordinates
if "pixelcoords" in self.keyfilter:
if self.subsampletype == "patch":
indx = torch.randint(0, width - self.subsamplesize + 1, size=(1,)).item()
indy = torch.randint(0, height - self.subsamplesize + 1, size=(1,)).item()
py, px = torch.meshgrid(
torch.arange(indy, indy + self.subsamplesize).float(),
torch.arange(indx, indx + self.subsamplesize).float())
elif self.subsampletype == "random":
px = torch.randint(0, width, size=(self.subsamplesize, self.subsamplesize)).float()
py = torch.randint(0, height, size=(self.subsamplesize, self.subsamplesize)).float()
elif self.subsampletype == "random2":
px = torch.random(size=(self.subsamplesize, self.subsamplesize)).float() * (width - 1)
py = torch.random(size=(self.subsamplesize, self.subsamplesize)).float() * (height - 1)
elif self.subsampletype == "stratified":
ssy = self.subsamplesize
ssx = self.subsamplesize
bsizex = (width - 1.) / ssx
bsizey = (height - 1.) / ssy
px = (torch.arange(ssx)[None, :] + torch.rand(size=(ssy, ssx))) * bsizex
py = (torch.arange(ssy)[:, None] + torch.rand(size=(ssy, ssx))) * bsizey
elif self.subsampletype == None:
py, px = torch.meshgrid(torch.arange(height).float(), torch.arange(width).float())
else:
raise
result["pixelcoords"] = torch.stack([px, py], dim=-1)
return result