def extract_patch_data()

in tools/data_prepare/patch_data_prepare_val.py [0:0]


def extract_patch_data(split, output_filename,
                         perturb_box2d=False, augment_times=1, whitelist=['Car']):
    ''' Extract depth patches and corresponding annotations
        defined generated from 2D bounding boxes
        (as that in 3d box label files)

    Input:
        idx_filename: string, each line of the file is a sample ID
        split: string, either trianing or testing
        output_filename: string, the name for output .pickle file
        viz: bool, whether to visualize extracted data
        perturb_box2d: bool, whether to perturb the box2d
            (used for data augmentation in train set)
        augmentX: scalar, how many augmentations to have for each 2D box.
        whitelist: a list of strings, object types we are interested in.
    Output:
        None (will write a .pickle file to the disk)
    '''
    data_dir = os.path.join(ROOT_DIR, 'data')
    dataset = KittiDataset(root_dir=data_dir, split=split)
    feat_dir = '../../data/KITTI/pickle_files/org'

    patch_xyz_list = []
    patch_rgb_list = []
    type_list = []
    heading_list = []
    box3d_center_list = []
    box3d_size_list = []
    frustum_angle_list = []
    feats_list = []


    progress_bar = tqdm.tqdm(total=len(dataset.idx_list), leave=True, desc='%s split patch data gen' % split)
    for data_idx in dataset.idx_list: # image idx
        data_idx = int(data_idx)
        calib = dataset.get_calib(data_idx)
        objects = dataset.get_label(data_idx)

        feature_path = os.path.join(feat_dir, '%06d.pickle' % data_idx)
        df = open(feature_path, 'rb')
        datas = pickle.load(df)
        df.close()
        features = datas['features'].data.cpu()
        trans_out = datas['trans']
        c = trans_out[0]
        s = trans_out[1]
        out_h = trans_out[2]
        out_w = trans_out[3]
        trans_output = get_affine_transform(c, s, 0, [out_w, out_h])

        # compute x,y,z for each pixel in depth map
        depth = dataset.get_depth(data_idx)
        image = dataset.get_image(data_idx)
        assert depth.size == image.size
        width, height = depth.size
        depth = np.array(depth).astype(np.float32) / 256
        uvdepth = np.zeros((height, width, 3), dtype=np.float32)
        for v in range(height):
            for u in range(width):
                uvdepth[v, u, 0] = u
                uvdepth[v, u, 1] = v
        uvdepth[:, :, 2] = depth
        uvdepth = uvdepth.reshape(-1, 3)
        xyz = calib.img_to_rect(uvdepth[:, 0], uvdepth[:, 1], uvdepth[:, 2])  # rect coord sys
        xyz = xyz.reshape(height, width, 3)  # record xyz, data type: float32
        uvdepth = uvdepth.reshape(height, width, 3)
        rgb = np.array(image)

        for object in objects:
            if object.cls_type not in whitelist:
                continue

            # get 2d box from ground truth
            box2d = object.box2d
            for _ in range(augment_times):
                # augment data by box2d perturbation
                if perturb_box2d:
                    xmin, ymin, xmax, ymax = random_shift_box2d(box2d)
                else:
                    xmin, ymin, xmax, ymax = box2d

                # Get frustum angle (according to center pixel in 2D BOX)
                box2d_center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0])
                uvdepth = np.zeros((1, 3))
                uvdepth[0, 0:2] = box2d_center
                uvdepth[0, 2] = 20  # some random depth
                box2d_center_rect = calib.img_to_rect(uvdepth[:, 0], uvdepth[:, 1], uvdepth[:, 2])
                frustum_angle = -1 * np.arctan2(box2d_center_rect[0,2], box2d_center_rect[0,0])

                # filter
                if object.level > 3: continue

                xmin, ymin = max(xmin, 0), max(ymin, 0)   # check range
                xmax, ymax = min(xmax, width), min(ymax, height)  # check range

                x1_out, y1_out = affine_transform((xmin, ymin), trans_output)
                x2_out, y2_out = affine_transform((xmax, ymax), trans_output)
                roi_feat = torchvision.ops.roi_align(features,
                                                     [torch.FloatTensor(np.array([[x1_out, y1_out, x2_out, y2_out]]))],
                                                     (16, 16)).squeeze(0).numpy()
        
                feats_list.append(roi_feat)

                patch_xyz = xyz[int(ymin):int(ymax), int(xmin):int(xmax), :]
                patch_rgb = rgb[int(ymin):int(ymax), int(xmin):int(xmax), :]

                patch_xyz_list.append(patch_xyz)
                patch_rgb_list.append(patch_rgb)
                type_list.append(object.cls_type)
                heading_list.append(object.ry)
                box3d_center_list.append((object.pos - [0.0, object.h/2, 0.0]).astype(np.float32))
                box3d_size = np.array([object.l, object.w, object.h]).astype(np.float32)
                box3d_size_list.append(box3d_size)
                frustum_angle_list.append(frustum_angle) #


        progress_bar.update()
    progress_bar.close()


    with open(output_filename,'wb') as fp:
        pickle.dump(patch_xyz_list, fp)
        pickle.dump(patch_rgb_list, fp)
        pickle.dump(type_list, fp)
        pickle.dump(heading_list, fp)
        pickle.dump(box3d_center_list, fp)
        pickle.dump(box3d_size_list, fp)
        pickle.dump(frustum_angle_list, fp)

    with open(output_filename.replace('.pickle','_feat.pickle'),'wb') as fp:
        pickle.dump(feats_list, fp)