Dassl.pytorch/dassl/data/transforms/transforms.py (239 lines of code) (raw):

import numpy as np import random import torch import torchvision.transforms.functional as F from torchvision.transforms import ( Resize, Compose, ToTensor, Normalize, CenterCrop, RandomCrop, ColorJitter, RandomApply, GaussianBlur, RandomGrayscale, RandomResizedCrop, RandomHorizontalFlip ) from torchvision.transforms.functional import InterpolationMode from .autoaugment import SVHNPolicy, CIFAR10Policy, ImageNetPolicy from .randaugment import RandAugment, RandAugment2, RandAugmentFixMatch AVAI_CHOICES = [ "random_flip", "random_resized_crop", "normalize", "instance_norm", "random_crop", "random_translation", "center_crop", # This has become a default operation during testing "cutout", "imagenet_policy", "cifar10_policy", "svhn_policy", "randaugment", "randaugment_fixmatch", "randaugment2", "gaussian_noise", "colorjitter", "randomgrayscale", "gaussian_blur", ] INTERPOLATION_MODES = { "bilinear": InterpolationMode.BILINEAR, "bicubic": InterpolationMode.BICUBIC, "nearest": InterpolationMode.NEAREST, } class Random2DTranslation: """Given an image of (height, width), we resize it to (height*1.125, width*1.125), and then perform random cropping. Args: height (int): target image height. width (int): target image width. p (float, optional): probability that this operation takes place. Default is 0.5. interpolation (int, optional): desired interpolation. Default is ``torchvision.transforms.functional.InterpolationMode.BILINEAR`` """ def __init__( self, height, width, p=0.5, interpolation=InterpolationMode.BILINEAR ): self.height = height self.width = width self.p = p self.interpolation = interpolation def __call__(self, img): if random.uniform(0, 1) > self.p: return F.resize( img=img, size=[self.height, self.width], interpolation=self.interpolation ) new_width = int(round(self.width * 1.125)) new_height = int(round(self.height * 1.125)) resized_img = F.resize( img=img, size=[new_height, new_width], interpolation=self.interpolation ) x_maxrange = new_width - self.width y_maxrange = new_height - self.height x1 = int(round(random.uniform(0, x_maxrange))) y1 = int(round(random.uniform(0, y_maxrange))) croped_img = F.crop( img=resized_img, top=y1, left=x1, height=self.height, width=self.width ) return croped_img class InstanceNormalization: """Normalize data using per-channel mean and standard deviation. Reference: - Ulyanov et al. Instance normalization: The missing in- gredient for fast stylization. ArXiv 2016. - Shu et al. A DIRT-T Approach to Unsupervised Domain Adaptation. ICLR 2018. """ def __init__(self, eps=1e-8): self.eps = eps def __call__(self, img): C, H, W = img.shape img_re = img.reshape(C, H * W) mean = img_re.mean(1).view(C, 1, 1) std = img_re.std(1).view(C, 1, 1) return (img-mean) / (std + self.eps) class Cutout: """Randomly mask out one or more patches from an image. https://github.com/uoguelph-mlrg/Cutout Args: n_holes (int, optional): number of patches to cut out of each image. Default is 1. length (int, optinal): length (in pixels) of each square patch. Default is 16. """ def __init__(self, n_holes=1, length=16): self.n_holes = n_holes self.length = length def __call__(self, img): """ Args: img (Tensor): tensor image of size (C, H, W). Returns: Tensor: image with n_holes of dimension length x length cut out of it. """ h = img.size(1) w = img.size(2) mask = np.ones((h, w), np.float32) for n in range(self.n_holes): y = np.random.randint(h) x = np.random.randint(w) y1 = np.clip(y - self.length // 2, 0, h) y2 = np.clip(y + self.length // 2, 0, h) x1 = np.clip(x - self.length // 2, 0, w) x2 = np.clip(x + self.length // 2, 0, w) mask[y1:y2, x1:x2] = 0.0 mask = torch.from_numpy(mask) mask = mask.expand_as(img) return img * mask class GaussianNoise: """Add gaussian noise.""" def __init__(self, mean=0, std=0.15, p=0.5): self.mean = mean self.std = std self.p = p def __call__(self, img): if random.uniform(0, 1) > self.p: return img noise = torch.randn(img.size()) * self.std + self.mean return img + noise def build_transform(cfg, is_train=True, choices=None): """Build transformation function. Args: cfg (CfgNode): config. is_train (bool, optional): for training (True) or test (False). Default is True. choices (list, optional): list of strings which will overwrite cfg.INPUT.TRANSFORMS if given. Default is None. """ if cfg.INPUT.NO_TRANSFORM: print("Note: no transform is applied!") return None if choices is None: choices = cfg.INPUT.TRANSFORMS for choice in choices: assert choice in AVAI_CHOICES target_size = f"{cfg.INPUT.SIZE[0]}x{cfg.INPUT.SIZE[1]}" normalize = Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) if is_train: return _build_transform_train(cfg, choices, target_size, normalize) else: return _build_transform_test(cfg, choices, target_size, normalize) def _build_transform_train(cfg, choices, target_size, normalize): print("Building transform_train") tfm_train = [] interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] input_size = cfg.INPUT.SIZE # Make sure the image size matches the target size conditions = [] conditions += ["random_crop" not in choices] conditions += ["random_resized_crop" not in choices] if all(conditions): print(f"+ resize to {target_size}") tfm_train += [Resize(input_size, interpolation=interp_mode)] if "random_translation" in choices: print("+ random translation") tfm_train += [Random2DTranslation(input_size[0], input_size[1])] if "random_crop" in choices: crop_padding = cfg.INPUT.CROP_PADDING print(f"+ random crop (padding = {crop_padding})") tfm_train += [RandomCrop(input_size, padding=crop_padding)] if "random_resized_crop" in choices: s_ = cfg.INPUT.RRCROP_SCALE print(f"+ random resized crop (size={input_size}, scale={s_})") tfm_train += [ RandomResizedCrop(input_size, scale=s_, interpolation=interp_mode) ] if "random_flip" in choices: print("+ random flip") tfm_train += [RandomHorizontalFlip()] if "imagenet_policy" in choices: print("+ imagenet policy") tfm_train += [ImageNetPolicy()] if "cifar10_policy" in choices: print("+ cifar10 policy") tfm_train += [CIFAR10Policy()] if "svhn_policy" in choices: print("+ svhn policy") tfm_train += [SVHNPolicy()] if "randaugment" in choices: n_ = cfg.INPUT.RANDAUGMENT_N m_ = cfg.INPUT.RANDAUGMENT_M print(f"+ randaugment (n={n_}, m={m_})") tfm_train += [RandAugment(n_, m_)] if "randaugment_fixmatch" in choices: n_ = cfg.INPUT.RANDAUGMENT_N print(f"+ randaugment_fixmatch (n={n_})") tfm_train += [RandAugmentFixMatch(n_)] if "randaugment2" in choices: n_ = cfg.INPUT.RANDAUGMENT_N print(f"+ randaugment2 (n={n_})") tfm_train += [RandAugment2(n_)] if "colorjitter" in choices: b_ = cfg.INPUT.COLORJITTER_B c_ = cfg.INPUT.COLORJITTER_C s_ = cfg.INPUT.COLORJITTER_S h_ = cfg.INPUT.COLORJITTER_H print( f"+ color jitter (brightness={b_}, " f"contrast={c_}, saturation={s_}, hue={h_})" ) tfm_train += [ ColorJitter( brightness=b_, contrast=c_, saturation=s_, hue=h_, ) ] if "randomgrayscale" in choices: print("+ random gray scale") tfm_train += [RandomGrayscale(p=cfg.INPUT.RGS_P)] if "gaussian_blur" in choices: print(f"+ gaussian blur (kernel={cfg.INPUT.GB_K})") gb_k, gb_p = cfg.INPUT.GB_K, cfg.INPUT.GB_P tfm_train += [RandomApply([GaussianBlur(gb_k)], p=gb_p)] print("+ to torch tensor of range [0, 1]") tfm_train += [ToTensor()] if "cutout" in choices: cutout_n = cfg.INPUT.CUTOUT_N cutout_len = cfg.INPUT.CUTOUT_LEN print(f"+ cutout (n_holes={cutout_n}, length={cutout_len})") tfm_train += [Cutout(cutout_n, cutout_len)] if "normalize" in choices: print( f"+ normalization (mean={cfg.INPUT.PIXEL_MEAN}, std={cfg.INPUT.PIXEL_STD})" ) tfm_train += [normalize] if "gaussian_noise" in choices: print( f"+ gaussian noise (mean={cfg.INPUT.GN_MEAN}, std={cfg.INPUT.GN_STD})" ) tfm_train += [GaussianNoise(cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD)] if "instance_norm" in choices: print("+ instance normalization") tfm_train += [InstanceNormalization()] tfm_train = Compose(tfm_train) return tfm_train def _build_transform_test(cfg, choices, target_size, normalize): print("Building transform_test") tfm_test = [] interp_mode = INTERPOLATION_MODES[cfg.INPUT.INTERPOLATION] input_size = cfg.INPUT.SIZE print(f"+ resize the smaller edge to {max(input_size)}") tfm_test += [Resize(max(input_size), interpolation=interp_mode)] print(f"+ {target_size} center crop") tfm_test += [CenterCrop(input_size)] print("+ to torch tensor of range [0, 1]") tfm_test += [ToTensor()] if "normalize" in choices: print( f"+ normalization (mean={cfg.INPUT.PIXEL_MEAN}, std={cfg.INPUT.PIXEL_STD})" ) tfm_test += [normalize] if "instance_norm" in choices: print("+ instance normalization") tfm_test += [InstanceNormalization()] tfm_test = Compose(tfm_test) return tfm_test