def compute_optimal_translation()

in phosa/pose_optimization.py [0:0]


def compute_optimal_translation(bbox_target, vertices, f=1, img_size=256):
    """
    Computes the optimal translation to align the mesh to a bounding box using
    least squares.

    Args:
        bbox_target (list): bounding box in xywh.
        vertices (B x V x 3): Batched vertices.
        f (float): Focal length.
        img_size (int): Image size in pixels.

    Returns:
        Optimal 3D translation (B x 3).
    """
    bbox_mask = np.array(bbox_target)
    mask_center = bbox_mask[:2] + bbox_mask[2:] / 2
    diag_mask = np.sqrt(bbox_mask[2] ** 2 + bbox_mask[3] ** 2)
    B = vertices.shape[0]
    x = torch.zeros(B).cuda()
    y = torch.zeros(B).cuda()
    z = 2.5 * torch.ones(B).cuda()
    for _ in range(50):
        translation = torch.stack((x, y, z), -1).unsqueeze(1)
        v = vertices + translation
        bbox_proj = compute_bbox_proj(v, f=1, img_size=img_size)
        diag_proj = torch.sqrt(torch.sum(bbox_proj[:, 2:] ** 2, 1))
        delta_z = z * (diag_proj / diag_mask - 1)
        z = z + delta_z
        proj_center = bbox_proj[:, :2] + bbox_proj[:, 2:] / 2
        x += (mask_center[0] - proj_center[:, 0]) * z / f / img_size
        y += (mask_center[1] - proj_center[:, 1]) * z / f / img_size
    return torch.stack((x, y, z), -1).unsqueeze(1)