def encode_multiview_pcl()

in shap_e/models/transmitter/channels_encoder.py [0:0]


    def encode_multiview_pcl(self, batch: AttrDict, use_distance: bool = True) -> torch.Tensor:
        """
        :return: [batch_size, num_views, n_patches, width]
        """
        all_views = self.views_to_tensor(batch.views).to(self.device)
        depths = self.raw_depths_to_tensor(batch.depths)
        all_view_alphas = self.view_alphas_to_tensor(batch.view_alphas).to(self.device)
        mask = all_view_alphas >= 0.999

        dense_poses, camera_z = self.dense_pose_cameras_to_tensor(batch.cameras)
        dense_poses = dense_poses.permute(0, 1, 4, 5, 2, 3)

        origin, direction = dense_poses[:, :, 0], dense_poses[:, :, 1]
        if use_distance:
            ray_depth_factor = torch.sum(direction * camera_z[..., None, None], dim=2, keepdim=True)
            depths = depths / ray_depth_factor
        position = origin + depths * direction
        all_view_poses = self.mv_pcl_embed(all_views, origin, position, mask)

        batch_size, num_views, _, _, _ = all_view_poses.shape

        views_proj = self.patch_emb(
            all_view_poses.reshape([batch_size * num_views, *all_view_poses.shape[2:]])
        )
        views_proj = (
            views_proj.reshape([batch_size, num_views, self.width, -1])
            .permute(0, 1, 3, 2)
            .contiguous()
        )  # [batch_size x num_views x n_patches x width]

        return views_proj