in shap_e/models/transmitter/channels_encoder.py [0:0]
def encode_multiview_pcl(self, batch: AttrDict, use_distance: bool = True) -> torch.Tensor:
"""
:return: [batch_size, num_views, n_patches, width]
"""
all_views = self.views_to_tensor(batch.views).to(self.device)
depths = self.raw_depths_to_tensor(batch.depths)
all_view_alphas = self.view_alphas_to_tensor(batch.view_alphas).to(self.device)
mask = all_view_alphas >= 0.999
dense_poses, camera_z = self.dense_pose_cameras_to_tensor(batch.cameras)
dense_poses = dense_poses.permute(0, 1, 4, 5, 2, 3)
origin, direction = dense_poses[:, :, 0], dense_poses[:, :, 1]
if use_distance:
ray_depth_factor = torch.sum(direction * camera_z[..., None, None], dim=2, keepdim=True)
depths = depths / ray_depth_factor
position = origin + depths * direction
all_view_poses = self.mv_pcl_embed(all_views, origin, position, mask)
batch_size, num_views, _, _, _ = all_view_poses.shape
views_proj = self.patch_emb(
all_view_poses.reshape([batch_size * num_views, *all_view_poses.shape[2:]])
)
views_proj = (
views_proj.reshape([batch_size, num_views, self.width, -1])
.permute(0, 1, 3, 2)
.contiguous()
) # [batch_size x num_views x n_patches x width]
return views_proj