in mmf/datasets/builders/synsin_realestate10k/realestate10k.py [0:0]
def __getitem__(self, index):
# index = self.rng.randint(self.imageset.shape[0])
# index = index % self.imageset.shape[0]
# Load text file containing frame information
frames = np.loadtxt(
self.base_file + "/%s.txt" % self.imageset[index]
)
image_index = self.rng.choice(frames.shape[0], size=(1,))[0]
# Chose 15 images within 30 frames of the iniital one
image_indices = self.rng.randint(80, size=(30,)) - 40 + image_index
image_indices = np.minimum(
np.maximum(image_indices, 0), frames.shape[0] - 1
)
# Look at the change in angle and choose a hard one
angles = []
translations = []
for viewpoint in range(0, image_indices.shape[0]):
orig_viewpoint = frames[image_index, 7:].reshape(3, 4)
new_viewpoint = frames[image_indices[viewpoint], 7:].reshape(3, 4)
dang, dtrans = get_deltas(orig_viewpoint, new_viewpoint)
angles += [dang]
translations += [dtrans]
angles = np.array(angles)
translations = np.array(translations)
mask = image_indices[
(angles > self.ANGLE_THRESH) | (translations > self.TRANS_THRESH)
]
rgbs = []
cameras = []
for i in range(0, self.num_views):
if i == 0:
t_index = image_index
elif mask.shape[0] > 5:
# Choose a harder angle change
t_index = mask[self.rng.randint(mask.shape[0])]
else:
t_index = image_indices[
self.rng.randint(image_indices.shape[0])
]
image = Image.open(
self.base_file
+ "/%s/" % (self.imageset[index])
+ str(int(frames[t_index, 0]))
+ ".png"
)
rgbs += [
img_as_float(np.array(image.resize([self.W, self.W], Image.BILINEAR)))
]
intrinsics = frames[t_index, 1:7]
extrinsics = frames[t_index, 7:]
origK = np.array(
[
[intrinsics[0], 0, intrinsics[2]],
[0, intrinsics[1], intrinsics[3]],
[0, 0, 1],
],
dtype=np.float32,
)
K = np.matmul(self.offset, origK)
origP = extrinsics.reshape(3, 4)
P = np.matmul(K, origP) # Merge these together to match habitat
P = np.vstack((P, np.zeros((1, 4), dtype=np.float32))).astype(
np.float32
)
P[3, 3] = 1
# Pinv = np.linalg.inv(P)
cameras += [
{
"P": P,
# "Pinv": Pinv,
# "OrigP": origP,
# "K": self.K,
# "Kinv": self.invK,
}
]
return {"images": rgbs, "cameras": cameras, "video_id": f"{index:08d}_{self.imageset[index]}"}