def preprocess_single_image()

in preprocess_image.py [0:0]


def preprocess_single_image(img_path, args):
    out_dir = os.path.dirname(img_path)
    out_rgba = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_rgba.png')
    out_depth = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_depth.png')
    out_normal = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_normal.png')
    out_caption = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_caption.txt')

    # load image
    print(f'[INFO] loading image {img_path}...')

    # check the exisiting files
    if os.path.isfile(out_rgba) and os.path.isfile(out_depth) and os.path.isfile(out_normal):
        print(f"{img_path} has already been here!")
        return
    print(img_path)
    image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    carved_image = None

    if image.shape[-1] == 4:
        carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
        image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)

    else:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    if carved_image is None:
        # carve background
        print(f'[INFO] background removal...')
        carved_image = BackgroundRemoval()(image) # [H, W, 4]
    mask = carved_image[..., -1] > 0

    # predict depth
    print(f'[INFO] depth estimation...')
    dpt_depth_model = DPT(task='depth')
    depth = dpt_depth_model(image)[0]
    depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
    depth[~mask] = 0
    depth = (depth * 255).astype(np.uint8)
    del dpt_depth_model

    # predict normal
    print(f'[INFO] normal estimation...')
    dpt_normal_model = DPT(task='normal')
    normal = dpt_normal_model(image)[0]
    normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
    normal[~mask] = 0
    del dpt_normal_model

    # recenter
    if opt.recenter:
        print(f'[INFO] recenter...')
        final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
        final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
        final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)

        coords = np.nonzero(mask)
        x_min, x_max = coords[0].min(), coords[0].max()
        y_min, y_max = coords[1].min(), coords[1].max()
        h = x_max - x_min
        w = y_max - y_min
        desired_size = int(opt.size * (1 - opt.border_ratio))
        scale = desired_size / max(h, w)
        h2 = int(h * scale)
        w2 = int(w * scale)
        x2_min = (opt.size - h2) // 2
        x2_max = x2_min + h2
        y2_min = (opt.size - w2) // 2
        y2_max = y2_min + w2
        final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
        final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
        final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)

    else:
        final_rgba = carved_image
        final_depth = depth
        final_normal = normal

    # write output
    cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
    cv2.imwrite(out_depth, final_depth)
    cv2.imwrite(out_normal, final_normal)

    if opt.do_caption:
        # predict caption (it's too slow... use your brain instead)
        print(f'[INFO] captioning...')
        blip2 = BLIP2()
        caption = blip2(image)
        with open(out_caption, 'w') as f:
            f.write(caption)