torchbenchmark/models/Background_Matting/data_loader.py (229 lines of code) (raw):

from __future__ import print_function, division import os import torch import pandas as pd import skimage from skimage import io import numpy as np import matplotlib.pyplot as plt import pdb, random from torch.utils.data import Dataset, DataLoader import random, os, cv2 unknown_code=128 class VideoData(Dataset): def __init__(self,csv_file,data_config,transform=None): self.frames = pd.read_csv(csv_file,sep=';') self.transform = transform self.resolution=data_config['reso'] def __len__(self): return len(self.frames) def __getitem__(self,idx): img = io.imread(self.frames.iloc[idx, 0]) back = io.imread(self.frames.iloc[idx, 1]) seg = io.imread(self.frames.iloc[idx, 2]) fr1 = cv2.cvtColor(io.imread(self.frames.iloc[idx, 3]), cv2.COLOR_BGR2GRAY) fr2 = cv2.cvtColor(io.imread(self.frames.iloc[idx, 4]), cv2.COLOR_BGR2GRAY) fr3 = cv2.cvtColor(io.imread(self.frames.iloc[idx, 5]), cv2.COLOR_BGR2GRAY) fr4 = cv2.cvtColor(io.imread(self.frames.iloc[idx, 6]), cv2.COLOR_BGR2GRAY) back_rnd = io.imread(self.frames.iloc[idx, 7]) sz=self.resolution if np.random.random_sample() > 0.5: img = cv2.flip(img,1) seg = cv2.flip(seg,1) back = cv2.flip(back,1) back_rnd = cv2.flip(back_rnd,1) fr1=cv2.flip(fr1,1); fr2=cv2.flip(fr2,1); fr3=cv2.flip(fr3,1); fr4=cv2.flip(fr4,1) #make frames together multi_fr=np.zeros((img.shape[0],img.shape[1],4)) multi_fr[...,0]=fr1; multi_fr[...,1]=fr2; multi_fr[...,2]=fr3; multi_fr[...,3]=fr4; #allow random cropping centered on the segmentation map bbox=create_bbox(seg,seg.shape[0],seg.shape[1]) img=apply_crop(img,bbox,self.resolution) seg=apply_crop(seg,bbox,self.resolution) back=apply_crop(back,bbox,self.resolution) back_rnd=apply_crop(back_rnd,bbox,self.resolution) multi_fr=apply_crop(multi_fr,bbox,self.resolution) #convert seg to guidance map #segg=create_seg_guide(seg,self.resolution) sample = {'image': to_tensor(img), 'seg': to_tensor(create_seg_guide(seg,self.resolution)), 'bg': to_tensor(back), 'multi_fr': to_tensor(multi_fr), 'seg-gt':to_tensor(seg), 'back-rnd': to_tensor(back_rnd)} if self.transform: sample = self.transform(sample) return sample class AdobeDataAffineHR(Dataset): def __init__(self,csv_file,data_config,transform=None): self.frames = pd.read_csv(csv_file,sep=';') self.transform = transform self.resolution=data_config['reso'] self.trimapK=data_config['trimapK'] self.noise=data_config['noise'] def __len__(self): return len(self.frames) def __getitem__(self,idx): try: #load fg = io.imread(self.frames.iloc[idx, 0]) alpha = io.imread(self.frames.iloc[idx, 1]) image = io.imread(self.frames.iloc[idx, 2]) back = io.imread(self.frames.iloc[idx, 3]) fg = cv2.resize(fg, dsize=(800,800)) alpha = cv2.resize(alpha, dsize=(800,800)) back = cv2.resize(back, dsize=(800,800)) image = cv2.resize(image, dsize=(800,800)) sz=self.resolution #random flip if np.random.random_sample() > 0.5: alpha = cv2.flip(alpha,1) fg = cv2.flip(fg,1) back = cv2.flip(back,1) image = cv2.flip(image,1) trimap=generate_trimap(alpha,self.trimapK[0],self.trimapK[1],False) #randcom crop+scale different_sizes = [(576,576),(608,608),(640,640),(672,672),(704,704),(736,736),(768,768),(800,800)] crop_size = random.choice(different_sizes) x, y = random_choice(trimap, crop_size) fg = safe_crop(fg, x, y, crop_size,sz) alpha = safe_crop(alpha, x, y, crop_size,sz) image = safe_crop(image, x, y, crop_size,sz) back = safe_crop(back, x, y, crop_size,sz) trimap = safe_crop(trimap, x, y, crop_size,sz) #Perturb Background: random noise addition or gamma change if self.noise: if np.random.random_sample() > 0.6: sigma=np.random.randint(low=2, high=6) mu=np.random.randint(low=0, high=14)-7 back_tr=add_noise(back,mu,sigma) else: back_tr=skimage.exposure.adjust_gamma(back,np.random.normal(1,0.12)) #Create motion cues: transform foreground and create 4 additional images affine_fr=np.zeros((fg.shape[0],fg.shape[1],4)) for t in range(0,4): T=np.random.normal(0,5,(2,1)); theta=np.random.normal(0,7); R=np.array([[np.cos(np.deg2rad(theta)), -np.sin(np.deg2rad(theta))],[np.sin(np.deg2rad(theta)), np.cos(np.deg2rad(theta))]]) sc=np.array([[1+np.random.normal(0,0.05), 0],[0,1]]); sh=np.array([[1, np.random.normal(0,0.05)*(np.random.random_sample() > 0.5)],[np.random.normal(0,0.05)*(np.random.random_sample() > 0.5), 1]]); A=np.concatenate((sc*sh*R, T), axis=1); fg_tr = cv2.warpAffine(fg.astype(np.uint8),A,(fg.shape[1],fg.shape[0]),flags=cv2.INTER_LINEAR,borderMode=cv2.BORDER_REFLECT) alpha_tr = cv2.warpAffine(alpha.astype(np.uint8),A,(fg.shape[1],fg.shape[0]),flags=cv2.INTER_NEAREST,borderMode=cv2.BORDER_REFLECT) sigma=np.random.randint(low=2, high=6) mu=np.random.randint(low=0, high=14)-7 back_tr0=add_noise(back,mu,sigma) affine_fr[...,t]=cv2.cvtColor(composite(fg_tr,back_tr0,alpha_tr), cv2.COLOR_BGR2GRAY) sample = {'image': to_tensor(image), 'fg': to_tensor(fg), 'alpha': to_tensor(alpha), 'bg': to_tensor(back), 'trimap': to_tensor(trimap), 'bg_tr': to_tensor(back_tr), 'seg': to_tensor(create_seg(alpha,trimap)), 'multi_fr': to_tensor(affine_fr)} if self.transform: sample = self.transform(sample) return sample except Exception as e: print("Error loading: " + self.frames.iloc[idx, 0]) print(e) #Functions def create_seg_guide(rcnn,reso): kernel_er = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) kernel_dil = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) rcnn=rcnn.astype(np.float32)/255; rcnn[rcnn>0.2]=1; K=25 zero_id=np.nonzero(np.sum(rcnn,axis=1)==0) del_id=zero_id[0][zero_id[0]>250] if len(del_id)>0: del_id=[del_id[0]-2,del_id[0]-1,*del_id] rcnn=np.delete(rcnn,del_id,0) rcnn = cv2.copyMakeBorder( rcnn, 0, K + len(del_id), 0, 0, cv2.BORDER_REPLICATE) rcnn = cv2.erode(rcnn, kernel_er, iterations=np.random.randint(10,20)) rcnn = cv2.dilate(rcnn, kernel_dil, iterations=np.random.randint(3,7)) k_size_list=[(21,21),(31,31),(41,41)] rcnn=cv2.GaussianBlur(rcnn.astype(np.float32),random.choice(k_size_list),0) rcnn=(255*rcnn).astype(np.uint8) rcnn=np.delete(rcnn, range(reso[0],reso[0]+K), 0) return rcnn def crop_holes(img,cx,cy,crop_size): img[cy:cy+crop_size[0],cx:cx+crop_size[1]]=0 return img def create_seg(alpha,trimap): #old num_holes=np.random.randint(low=0, high=3) crop_size_list=[(15,15),(25,25),(35,35),(45,45)] kernel_er = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) kernel_dil = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) seg = (alpha>0.5).astype(np.float32) #print('Before %.4f max: %.4f' %(seg.sum(),seg.max())) #old seg = cv2.erode(seg, kernel_er, iterations=np.random.randint(low=10,high=20)) seg = cv2.dilate(seg, kernel_dil, iterations=np.random.randint(low=15,high=30)) #print('After %.4f max: %.4f' %(seg.sum(),seg.max())) seg=seg.astype(np.float32) seg=(255*seg).astype(np.uint8) for i in range(num_holes): crop_size=random.choice(crop_size_list) cx,cy = random_choice(trimap,crop_size) seg=crop_holes(seg,cx,cy,crop_size) trimap=crop_holes(trimap,cx,cy,crop_size) k_size_list=[(21,21),(31,31),(41,41)] seg=cv2.GaussianBlur(seg.astype(np.float32),random.choice(k_size_list),0) return seg.astype(np.uint8) def apply_crop(img,bbox,reso): img_crop=img[bbox[0]:bbox[0]+bbox[2],bbox[1]:bbox[1]+bbox[3],...]; img_crop=cv2.resize(img_crop,reso) return img_crop def create_bbox(mask,R,C): where = np.array(np.where(mask)) x1, y1 = np.amin(where, axis=1) x2, y2 = np.amax(where, axis=1) w=np.maximum(y2-y1,x2-x1); bd=np.random.uniform(0.1,0.4) x1=x1-np.round(bd*w) y1=y1-np.round(bd*w) y2=y2+np.round(bd*w) if x1<0: x1=0 if y1<0: y1=0 if y2>=C: y2=C if x2>=R: x2=R-1 bbox=np.around([x1,y1,x2-x1,y2-y1]).astype('int') return bbox def composite(fg, bg, a): fg = fg.astype(np.float32); bg=bg.astype(np.float32); a=a.astype(np.float32); alpha= np.expand_dims(a / 255,axis=2) im = alpha * fg + (1 - alpha) * bg im = im.astype(np.uint8) return im def add_noise(back,mean,sigma): back=back.astype(np.float32) row,col,ch= back.shape gauss = np.random.normal(mean,sigma,(row,col,ch)) gauss = gauss.reshape(row,col,ch) #gauss = np.repeat(gauss[:, :, np.newaxis], ch, axis=2) noisy = back + gauss noisy[noisy<0]=0; noisy[noisy>255]=255; return noisy.astype(np.uint8) def safe_crop(mat, x, y, crop_size,img_size,cubic=True): img_rows, img_cols = img_size crop_height, crop_width = crop_size if len(mat.shape) == 2: ret = np.zeros((crop_height, crop_width), np.float32) else: ret = np.zeros((crop_height, crop_width, 3), np.float32) crop = mat[y:y + crop_height, x:x + crop_width] h, w = crop.shape[:2] ret[0:h, 0:w] = crop if crop_size != (img_rows, img_cols): if cubic: ret = cv2.resize(ret, dsize=(img_rows, img_cols)) else: ret = cv2.resize(ret, dsize=(img_rows, img_cols), interpolation=cv2.INTER_NEAREST) return ret def generate_trimap(alpha,K1,K2,train_mode): kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) fg = np.array(np.equal(alpha, 255).astype(np.float32)) if train_mode: K=np.random.randint(K1,K2) else: K=np.round((K1+K2)/2).astype('int') fg = cv2.erode(fg, kernel, iterations=K) unknown = np.array(np.not_equal(alpha, 0).astype(np.float32)) unknown = cv2.dilate(unknown, kernel, iterations=2*K) trimap = fg * 255 + (unknown - fg) * 128 return trimap.astype(np.uint8) def random_choice(trimap, crop_size=(320, 320)): img_height, img_width = trimap.shape[0:2] crop_height, crop_width = crop_size val_idx=np.zeros((img_height,img_width)) val_idx[int(crop_height/2):int(img_height-crop_height/2),int(crop_width/2):int(img_width-crop_width/2)]=1 y_indices, x_indices = np.where(np.logical_and(trimap == unknown_code,val_idx==1)) num_unknowns = len(y_indices) x, y = 0, 0 if num_unknowns > 0: ix = np.random.choice(range(num_unknowns)) center_x = x_indices[ix] center_y = y_indices[ix] x = max(0, center_x - int(crop_width / 2)) y = max(0, center_y - int(crop_height / 2)) #added extra return x, y def to_tensor(pic): if len(pic.shape)>=3: img = torch.from_numpy(pic.transpose((2, 0, 1))) else: img=torch.from_numpy(pic) img=img.unsqueeze(0) # backward compatibility return 2*(img.float().div(255))-1