in phosa/pose_optimization.py [0:0]
def compute_optimal_translation(bbox_target, vertices, f=1, img_size=256):
"""
Computes the optimal translation to align the mesh to a bounding box using
least squares.
Args:
bbox_target (list): bounding box in xywh.
vertices (B x V x 3): Batched vertices.
f (float): Focal length.
img_size (int): Image size in pixels.
Returns:
Optimal 3D translation (B x 3).
"""
bbox_mask = np.array(bbox_target)
mask_center = bbox_mask[:2] + bbox_mask[2:] / 2
diag_mask = np.sqrt(bbox_mask[2] ** 2 + bbox_mask[3] ** 2)
B = vertices.shape[0]
x = torch.zeros(B).cuda()
y = torch.zeros(B).cuda()
z = 2.5 * torch.ones(B).cuda()
for _ in range(50):
translation = torch.stack((x, y, z), -1).unsqueeze(1)
v = vertices + translation
bbox_proj = compute_bbox_proj(v, f=1, img_size=img_size)
diag_proj = torch.sqrt(torch.sum(bbox_proj[:, 2:] ** 2, 1))
delta_z = z * (diag_proj / diag_mask - 1)
z = z + delta_z
proj_center = bbox_proj[:, :2] + bbox_proj[:, 2:] / 2
x += (mask_center[0] - proj_center[:, 0]) * z / f / img_size
y += (mask_center[1] - proj_center[:, 1]) * z / f / img_size
return torch.stack((x, y, z), -1).unsqueeze(1)