in data/realestate10k.py [0:0]
def __getitem__(self, index):
index = self.rng.randint(self.imageset.shape[0])
# Load text file containing frame information
frames = np.loadtxt(
self.base_file
+ "/frames/%s/%s.txt" % (self.dataset, self.imageset[index])
)
image_index = self.rng.choice(
max(1, frames.shape[0] - self.num_views), size=(1,)
)[0]
image_indices = np.linspace(
image_index, image_index + self.num_views - 1, self.num_views
).astype(np.int32)
image_indices = np.minimum(
np.maximum(image_indices, 0), frames.shape[0] - 1
)
rgbs = []
cameras = []
for i in range(0, self.num_views):
t_index = image_indices[i]
image = Image.open(
self.base_file
+ "/frames/%s/%s/" % (self.dataset, self.imageset[index])
+ str(int(frames[t_index, 0]))
+ ".png"
)
rgbs += [self.input_transform(image)]
intrinsics = frames[t_index, 1:7]
extrinsics = frames[t_index, 7:]
origK = np.array(
[
[intrinsics[0], 0, intrinsics[2]],
[0, intrinsics[1], intrinsics[3]],
[0, 0, 1],
],
dtype=np.float32,
)
K = np.matmul(self.offset, origK)
origP = extrinsics.reshape(3, 4)
np.set_printoptions(precision=3, suppress=True)
P = np.matmul(K, origP) # Merge these together to match habitat
P = np.vstack((P, np.zeros((1, 4), dtype=np.float32))).astype(
np.float32
)
P[3, 3] = 1
Pinv = np.linalg.inv(P)
cameras += [
{
"P": P,
"Pinv": Pinv,
"OrigP": origP,
"K": self.K,
"Kinv": self.invK,
}
]
return {"images": rgbs, "cameras": cameras}