in data/realestate10k.py [0:0]
def __getitem_simple__(self, index):
index = self.rng.randint(self.imageset.shape[0])
# index = index % self.imageset.shape[0]
# Load text file containing frame information
frames = np.loadtxt(
self.base_file
+ "/frames/%s/%s.txt" % (self.dataset, self.imageset[index])
)
image_index = self.rng.choice(frames.shape[0], size=(1,))[0]
rgbs = []
cameras = []
for i in range(0, self.num_views):
t_index = max(
min(
image_index + self.rng.randint(16) - 8, frames.shape[0] - 1
),
0,
)
image = Image.open(
self.base_file
+ "/frames/%s/%s/" % (self.dataset, self.imageset[index])
+ str(int(frames[t_index, 0]))
+ ".png"
)
rgbs += [self.input_transform(image)]
intrinsics = frames[t_index, 1:7]
extrinsics = frames[t_index, 7:]
origK = np.array(
[
[intrinsics[0], 0, intrinsics[2]],
[0, intrinsics[1], intrinsics[3]],
[0, 0, 1],
],
dtype=np.float32,
)
K = np.matmul(self.offset, origK)
origP = extrinsics.reshape(3, 4)
P = np.matmul(K, origP) # Merge these together to match habitat
P = np.vstack((P, np.zeros((1, 4), dtype=np.float32))).astype(
np.float32
)
P[3, 3] = 1
Pinv = np.linalg.inv(P)
cameras += [
{
"P": P,
"OrigP": origP,
"Pinv": Pinv,
"K": self.K,
"Kinv": self.invK,
}
]
return {"images": rgbs, "cameras": cameras}