in preprocess_image.py [0:0]
def preprocess_single_image(img_path, args):
out_dir = os.path.dirname(img_path)
out_rgba = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_rgba.png')
out_depth = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_depth.png')
out_normal = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_normal.png')
out_caption = os.path.join(out_dir, os.path.basename(img_path).split('.')[0] + '_caption.txt')
# load image
print(f'[INFO] loading image {img_path}...')
# check the exisiting files
if os.path.isfile(out_rgba) and os.path.isfile(out_depth) and os.path.isfile(out_normal):
print(f"{img_path} has already been here!")
return
print(img_path)
image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
carved_image = None
if image.shape[-1] == 4:
carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
else:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if carved_image is None:
# carve background
print(f'[INFO] background removal...')
carved_image = BackgroundRemoval()(image) # [H, W, 4]
mask = carved_image[..., -1] > 0
# predict depth
print(f'[INFO] depth estimation...')
dpt_depth_model = DPT(task='depth')
depth = dpt_depth_model(image)[0]
depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
depth[~mask] = 0
depth = (depth * 255).astype(np.uint8)
del dpt_depth_model
# predict normal
print(f'[INFO] normal estimation...')
dpt_normal_model = DPT(task='normal')
normal = dpt_normal_model(image)[0]
normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
normal[~mask] = 0
del dpt_normal_model
# recenter
if opt.recenter:
print(f'[INFO] recenter...')
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
coords = np.nonzero(mask)
x_min, x_max = coords[0].min(), coords[0].max()
y_min, y_max = coords[1].min(), coords[1].max()
h = x_max - x_min
w = y_max - y_min
desired_size = int(opt.size * (1 - opt.border_ratio))
scale = desired_size / max(h, w)
h2 = int(h * scale)
w2 = int(w * scale)
x2_min = (opt.size - h2) // 2
x2_max = x2_min + h2
y2_min = (opt.size - w2) // 2
y2_max = y2_min + w2
final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
else:
final_rgba = carved_image
final_depth = depth
final_normal = normal
# write output
cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
cv2.imwrite(out_depth, final_depth)
cv2.imwrite(out_normal, final_normal)
if opt.do_caption:
# predict caption (it's too slow... use your brain instead)
print(f'[INFO] captioning...')
blip2 = BLIP2()
caption = blip2(image)
with open(out_caption, 'w') as f:
f.write(caption)