def run_phi()

in model.py [0:0]


    def run_phi(self,
                kp_loc,
                kp_vis,
                class_mask=None,
                ):

        preds = {}

        # batch size
        ba = kp_loc.shape[0]
        dtype = kp_loc.type()

        kp_loc_orig = kp_loc.clone()

        if self.z_augment and self.training:
            R_rand = rand_rot(ba,
                              dtype=dtype,
                              max_rot_angle=float(self.z_augment_rot_angle),
                              axes=(0, 0, 1))
            kp_loc_in = torch.bmm(R_rand[:, 0:2, 0:2], kp_loc)
        else:
            R_rand = torch.eye(3).type(dtype)[None].repeat((ba, 1, 1))
            kp_loc_in = kp_loc_orig

        if self.z_equivariance and self.training:
            # random xy rot
            R_rand_eq = rand_rot(ba,
                                 dtype=dtype,
                                 max_rot_angle=float(
                                     self.z_equivariance_rot_angle),
                                 axes=(0, 0, 1))
            kp_loc_in = torch.cat(
                (kp_loc_in,
                 torch.bmm(R_rand_eq[:, 0:2, 0:2], kp_loc_in)
                 ), dim=0)
            kp_vis_in = kp_vis.repeat((2, 1))
        else:
            kp_vis_in = kp_vis

        # mask kp_loc by kp_visibility
        kp_loc_masked = kp_loc_in * kp_vis_in[:, None, :]

        # vectorize
        kp_loc_flatten = kp_loc_masked.view(-1, 2*self.n_keypoints)

        # concatenate visibilities and kp locations
        l1_input = torch.cat((kp_loc_flatten, kp_vis_in), dim=1)

        # pass to network
        if self.independent_phi_for_aug and l1_input.shape[0] == 2*ba:
            feats = torch.cat([self.phi(l1_[:, :, None, None]) for
                               l1_ in l1_input.split(ba, dim=0)], dim=0)
        else:
            feats = self.phi(l1_input[:, :, None, None])

        # coefficients into the linear basis
        shape_coeff = self.alpha_layer(feats)[:, :, 0, 0]

        if self.z_equivariance and self.training:
            # use the shape coeff from the second set of preds
            shape_coeff = shape_coeff[ba:]
            # take the feats from the first set
            feats = feats[:ba]

        # shape prediction is just a linear layer implemented as a conv
        shape_canonical = self.shape_layer(
            shape_coeff[:, :, None, None])[:, :, 0, 0]
        shape_canonical = shape_canonical.view(ba, 3, self.n_keypoints)

        if self.keypoint_norm_type == 'to_root':
            # make sure we fix the root at 0
            root_j = shape_canonical[:, :, self.root_joint]
            shape_canonical = shape_canonical - root_j[:, :, None]

        # predict camera params
        # ... log rotation (exponential representation)
        R_log = self.rot_layer(feats)[:, :, 0, 0]

        # convert from the 3D to 3x3 rot matrix
        R = so3_exponential_map(R_log)

        # T vector of the camera
        if self.camera_translation:
            T = self.translation_layer(feats)[:, :, 0, 0]
            if self.camera_xy_translation:  # kill the last z-dim
                T = T * torch.tensor([1., 1., 0.]).type(dtype)[None, :]
        else:
            T = R_log.new_zeros(ba, 3)

        # offset the translation vector of the camera
        if self.depth_offset > 0.:
            T[:, 2] = T[:, 2] + self.depth_offset

        # scale of the camera
        if self.camera_scale:
            scale = self.scale_layer(feats)[:, 0, 0, 0]
        else:
            scale = R_log.new_ones(ba)

        # rotated+scaled shape into the camera ( Y = sRX + T  )
        shape_camera_coord = self.apply_similarity_t(
            shape_canonical, R, T, scale)

        # undo equivariant transformation
        if (self.z_equivariance or self.z_augment) and self.training:
            R_rand_inv = R_rand.transpose(2, 1)
            R = torch.bmm(R_rand_inv, R)
            T = torch.bmm(R_rand_inv, T[:, :, None])[:, :, 0]
            shape_camera_coord = torch.bmm(R_rand_inv, shape_camera_coord)

        # estimate translation
        if self.argmin_translation:
            assert self.projection_type == 'orthographic'
            projection, _ = self.camera_projection(shape_camera_coord)
            T_amin = argmin_translation(projection, kp_loc_orig, v=kp_vis)
            T_amin = Fu.pad(T_amin, (0, 1), 'constant', float(0))
            shape_camera_coord = shape_camera_coord + T_amin[:, :, None]
            T = T + T_amin

        if class_mask is not None:
            shape_camera_coord = shape_camera_coord * class_mask[:, None, :]
            shape_canonical = shape_canonical * class_mask[:, None, :]

        preds['R_log'] = R_log
        preds['R'] = R
        preds['scale'] = scale
        preds['T'] = T
        preds['shape_camera_coord'] = shape_camera_coord
        preds['shape_coeff'] = shape_coeff
        preds['shape_canonical'] = shape_canonical

        return preds