in interaction_exploration/utils/map_util.py [0:0]
def image_to_world(self, agent_pose, depth_inputs):
agent_pose = torch.Tensor(agent_pose)
depth_inputs = torch.from_numpy(depth_inputs).unsqueeze(0) # depth in meters (1, 300, 300)
depth_inputs = F.interpolate(depth_inputs.unsqueeze(0), self.out_size, mode='bilinear', align_corners=True)[0]
agent_pose = agent_pose.unsqueeze(0)
depth_inputs = depth_inputs.unsqueeze(0)
fx, fy, cx, cy = self.cam_params
bs, _, imh, imw = depth_inputs.shape
device = depth_inputs.device
# 2D image coordinates
x = rearrange(torch.arange(0, imw), 'w -> () () () w')
y = rearrange(torch.arange(0, imh), 'h -> () () h ()')
x, y = x.float().to(device), y.float().to(device)
xx = (x - cx) / fx
yy = (y - cy) / fy
# 3D real-world coordinates (in meters)
Z = depth_inputs
X = xx * Z # (B, 1, imh, imw)
Y = yy * Z # (B, 1, imh, imw)
P = torch.cat([X, Y, Z], 1) # (B, 3, imh, imw)
P = rearrange(P, 'b p h w -> b p (h w)') # (B, 3, h*w) # matrix mult time
# Sometimes, agent rotation is not a multiple of the interval (59 instead of 60 sometimes)
# round them to the nearest multiple of 30
# correct for cameraHorizon
Rx = self.rot_x[(-agent_pose[:, 4]//self.rot_size_x).long()] # (B, 3, 3)
Ry = self.rot_y[(agent_pose[:, 3]//self.rot_size_y).long()] # (B, 3, 3)
R = torch.bmm(Ry, Rx) # (B, 3, 3)
P0 = agent_pose[:, 0:3] # (B, 3)
P0[:, 1] = -P0[:, 1] # negative y
P0 = P0.unsqueeze(-1) # (B, 3, 1)
R = R.to(depth_inputs.device)
P = torch.bmm(R, P) + P0 # (B, 3, 3) * (B, 3, h*w) + (B, 3, 1) --> (B, 3, h*w)
P = rearrange(P, 'b p (h w) -> b p h w', h=imh, w=imw)
return