in datasets/sunrgbd.py [0:0]
def __getitem__(self, idx):
scan_name = self.scan_names[idx]
if scan_name.startswith("/"):
scan_path = scan_name
else:
scan_path = os.path.join(self.data_path, scan_name)
point_cloud = np.load(scan_path + "_pc.npz")["pc"] # Nx6
bboxes = np.load(scan_path + "_bbox.npy") # K,8
if not self.use_color:
point_cloud = point_cloud[:, 0:3]
else:
assert point_cloud.shape[1] == 6
point_cloud = point_cloud[:, 0:6]
point_cloud[:, 3:] = point_cloud[:, 3:] - MEAN_COLOR_RGB
if self.use_height:
floor_height = np.percentile(point_cloud[:, 2], 0.99)
height = point_cloud[:, 2] - floor_height
point_cloud = np.concatenate(
[point_cloud, np.expand_dims(height, 1)], 1
) # (N,4) or (N,7)
# ------------------------------- DATA AUGMENTATION ------------------------------
if self.augment:
if np.random.random() > 0.5:
# Flipping along the YZ plane
point_cloud[:, 0] = -1 * point_cloud[:, 0]
bboxes[:, 0] = -1 * bboxes[:, 0]
bboxes[:, 6] = np.pi - bboxes[:, 6]
# Rotation along up-axis/Z-axis
rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree
rot_mat = pc_util.rotz(rot_angle)
point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat))
bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
bboxes[:, 6] -= rot_angle
# Augment RGB color
if self.use_color:
rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
rgb_color *= (
1 + 0.4 * np.random.random(3) - 0.2
) # brightness change for each channel
rgb_color += (
0.1 * np.random.random(3) - 0.05
) # color shift for each channel
rgb_color += np.expand_dims(
(0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1
) # jittering on each pixel
rgb_color = np.clip(rgb_color, 0, 1)
# randomly drop out 30% of the points' colors
rgb_color *= np.expand_dims(
np.random.random(point_cloud.shape[0]) > 0.3, -1
)
point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB
# Augment point cloud scale: 0.85x-1.15x
scale_ratio = np.random.random() * 0.3 + 0.85
scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
point_cloud[:, 0:3] *= scale_ratio
bboxes[:, 0:3] *= scale_ratio
bboxes[:, 3:6] *= scale_ratio
if self.use_height:
point_cloud[:, -1] *= scale_ratio[0, 0]
if self.use_random_cuboid:
point_cloud, bboxes, _ = self.random_cuboid_augmentor(
point_cloud, bboxes
)
# ------------------------------- LABELS ------------------------------
angle_classes = np.zeros((self.max_num_obj,), dtype=np.float32)
angle_residuals = np.zeros((self.max_num_obj,), dtype=np.float32)
raw_angles = np.zeros((self.max_num_obj,), dtype=np.float32)
raw_sizes = np.zeros((self.max_num_obj, 3), dtype=np.float32)
label_mask = np.zeros((self.max_num_obj))
label_mask[0 : bboxes.shape[0]] = 1
max_bboxes = np.zeros((self.max_num_obj, 8))
max_bboxes[0 : bboxes.shape[0], :] = bboxes
target_bboxes_mask = label_mask
target_bboxes = np.zeros((self.max_num_obj, 6))
for i in range(bboxes.shape[0]):
bbox = bboxes[i]
semantic_class = bbox[7]
raw_angles[i] = bbox[6] % 2 * np.pi
box3d_size = bbox[3:6] * 2
raw_sizes[i, :] = box3d_size
angle_class, angle_residual = self.dataset_config.angle2class(bbox[6])
angle_classes[i] = angle_class
angle_residuals[i] = angle_residual
corners_3d = self.dataset_config.my_compute_box_3d(
bbox[0:3], bbox[3:6], bbox[6]
)
# compute axis aligned box
xmin = np.min(corners_3d[:, 0])
ymin = np.min(corners_3d[:, 1])
zmin = np.min(corners_3d[:, 2])
xmax = np.max(corners_3d[:, 0])
ymax = np.max(corners_3d[:, 1])
zmax = np.max(corners_3d[:, 2])
target_bbox = np.array(
[
(xmin + xmax) / 2,
(ymin + ymax) / 2,
(zmin + zmax) / 2,
xmax - xmin,
ymax - ymin,
zmax - zmin,
]
)
target_bboxes[i, :] = target_bbox
point_cloud, choices = pc_util.random_sampling(
point_cloud, self.num_points, return_choices=True
)
point_cloud_dims_min = point_cloud.min(axis=0)
point_cloud_dims_max = point_cloud.max(axis=0)
mult_factor = point_cloud_dims_max - point_cloud_dims_min
box_sizes_normalized = scale_points(
raw_sizes.astype(np.float32)[None, ...],
mult_factor=1.0 / mult_factor[None, ...],
)
box_sizes_normalized = box_sizes_normalized.squeeze(0)
box_centers = target_bboxes.astype(np.float32)[:, 0:3]
box_centers_normalized = shift_scale_points(
box_centers[None, ...],
src_range=[
point_cloud_dims_min[None, ...],
point_cloud_dims_max[None, ...],
],
dst_range=self.center_normalizing_range,
)
box_centers_normalized = box_centers_normalized.squeeze(0)
box_centers_normalized = box_centers_normalized * target_bboxes_mask[..., None]
# re-encode angles to be consistent with VoteNet eval
angle_classes = angle_classes.astype(np.int64)
angle_residuals = angle_residuals.astype(np.float32)
raw_angles = self.dataset_config.class2angle_batch(
angle_classes, angle_residuals
)
box_corners = self.dataset_config.box_parametrization_to_corners_np(
box_centers[None, ...],
raw_sizes.astype(np.float32)[None, ...],
raw_angles.astype(np.float32)[None, ...],
)
box_corners = box_corners.squeeze(0)
ret_dict = {}
ret_dict["point_clouds"] = point_cloud.astype(np.float32)
ret_dict["gt_box_corners"] = box_corners.astype(np.float32)
ret_dict["gt_box_centers"] = box_centers.astype(np.float32)
ret_dict["gt_box_centers_normalized"] = box_centers_normalized.astype(
np.float32
)
target_bboxes_semcls = np.zeros((self.max_num_obj))
target_bboxes_semcls[0 : bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9
ret_dict["gt_box_sem_cls_label"] = target_bboxes_semcls.astype(np.int64)
ret_dict["gt_box_present"] = target_bboxes_mask.astype(np.float32)
ret_dict["scan_idx"] = np.array(idx).astype(np.int64)
ret_dict["gt_box_sizes"] = raw_sizes.astype(np.float32)
ret_dict["gt_box_sizes_normalized"] = box_sizes_normalized.astype(np.float32)
ret_dict["gt_box_angles"] = raw_angles.astype(np.float32)
ret_dict["gt_angle_class_label"] = angle_classes
ret_dict["gt_angle_residual_label"] = angle_residuals
ret_dict["point_cloud_dims_min"] = point_cloud_dims_min
ret_dict["point_cloud_dims_max"] = point_cloud_dims_max
return ret_dict