in multi_obj_pose_estimation/train_multi.py [0:0]
def eval(niter, datacfg):
def truths_length(truths):
for i in range(50):
if truths[i][1] == 0:
return i
# Parse configuration files
options = read_data_cfg(datacfg)
valid_images = options['valid']
meshname = options['mesh']
backupdir = options['backup']
name = options['name']
# Read object model information, get 3D bounding box corners
mesh = MeshPly(meshname)
vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
corners3D = get_3D_corners(vertices)
# Read intrinsic camera parameters
internal_calibration = get_camera_intrinsic(u0, v0, fx, fy)
# Get validation file names
with open(valid_images) as fp:
tmp_files = fp.readlines()
valid_files = [item.rstrip() for item in tmp_files]
# Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
model.eval()
# Get the parser for the test dataset
valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.module.width, model.module.height),
shuffle=False,
objclass=name,
transform=transforms.Compose([
transforms.ToTensor(),
]))
valid_batchsize = 1
# Specify the number of workers for multiple processing, get the dataloader for the test dataset
kwargs = {'num_workers': 4, 'pin_memory': True}
test_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs)
# Parameters
num_classes = model.module.num_classes
anchors = model.module.anchors
num_anchors = model.module.num_anchors
testing_error_pixel = 0.0
testing_samples = 0.0
errs_2d = []
logging(" Number of test samples: %d" % len(test_loader.dataset))
# Iterate through test examples
for batch_idx, (data, target) in enumerate(test_loader):
t1 = time.time()
# Pass the data to GPU
if use_cuda:
data = data.cuda()
target = target.cuda()
# Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
data = Variable(data, volatile=True)
t2 = time.time()
# Formward pass
output = model(data).data
t3 = time.time()
# Using confidence threshold, eliminate low-confidence predictions
trgt = target[0].view(-1, num_labels)
all_boxes = get_multi_region_boxes(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)
t4 = time.time()
# Iterate through all batch elements
for i in range(output.size(0)):
# For each image, get all the predictions
boxes = all_boxes[i]
# For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
truths = target[i].view(-1, num_labels)
# Get how many objects are present in the scene
num_gts = truths_length(truths)
# Iterate through each ground-truth object
for k in range(num_gts):
box_gt = list()
for j in range(1, num_labels):
box_gt.append(truths[k][j])
box_gt.extend([1.0, 1.0])
box_gt.append(truths[k][0])
# If the prediction has the highest confidence, choose it as our prediction
best_conf_est = -sys.maxsize
for j in range(len(boxes)):
if (boxes[j][2*num_keypoints] > best_conf_est) and (boxes[j][2*num_keypoints+2] == int(truths[k][0])):
best_conf_est = boxes[j][2*num_keypoints]
box_pr = boxes[j]
match = corner_confidence(box_gt[:2*num_keypoints], torch.FloatTensor(boxes[j][:2*num_keypoints]))
# Denormalize the corner predictions
corners2D_gt = np.array(np.reshape(box_gt[:2*num_keypoints], [num_keypoints, 2]), dtype='float32')
corners2D_pr = np.array(np.reshape(box_pr[:2*num_keypoints], [num_keypoints, 2]), dtype='float32')
corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION
# Compute [R|t] by pnp
objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')
K = np.array(internal_calibration, dtype='float32')
R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)
# Compute pixel error
Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration)
proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration)
proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration))
proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration))
norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
pixel_dist = np.mean(norm)
errs_2d.append(pixel_dist)
# Sum errors
testing_error_pixel += pixel_dist
testing_samples += 1
t5 = time.time()
# Compute 2D reprojection score
eps = 1e-5
for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
if True:
logging('-----------------------------------')
logging(' tensor to cuda : %f' % (t2 - t1))
logging(' predict : %f' % (t3 - t2))
logging('get_region_boxes : %f' % (t4 - t3))
logging(' eval : %f' % (t5 - t4))
logging(' total : %f' % (t5 - t1))
logging('-----------------------------------')
# Register losses and errors for saving later on
testing_iters.append(niter)
testing_errors_pixel.append(testing_error_pixel/(float(testing_samples)+eps))
testing_accuracies.append(acc)