def encode_dense_pose_views()

in shap_e/models/transmitter/channels_encoder.py [0:0]


    def encode_dense_pose_views(self, batch: AttrDict) -> torch.Tensor:
        """
        :return: [batch_size, num_views, n_patches, width]
        """
        all_views = self.views_to_tensor(batch.views).to(self.device)
        if self.use_depth:
            depths = self.depths_to_tensor(batch.depths)
            all_views = torch.cat([all_views, depths], dim=2)

        dense_poses, _ = self.dense_pose_cameras_to_tensor(batch.cameras)
        dense_poses = dense_poses.permute(0, 1, 4, 5, 2, 3)
        position, direction = dense_poses[:, :, 0], dense_poses[:, :, 1]
        all_view_poses = self.mv_pose_embed(all_views, position, direction)

        batch_size, num_views, _, _, _ = all_view_poses.shape

        views_proj = self.patch_emb(
            all_view_poses.reshape([batch_size * num_views, *all_view_poses.shape[2:]])
        )
        views_proj = (
            views_proj.reshape([batch_size, num_views, self.width, -1])
            .permute(0, 1, 3, 2)
            .contiguous()
        )  # [batch_size x num_views x n_patches x width]

        return views_proj