PyTorchClassification/data_loader.py [201:316]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                imgs = [self.center_crop(resized_img)]

            # Add all bboxes and their flip
            for bbox in bboxes:
                bbox_shape = np.array([bbox[2] - bbox[0], bbox[3] - bbox[1]])
                padding = bbox_shape.max() * 0.1
                # Add offset to the shorter side to crop a square patch
                offset = (bbox_shape - np.min(bbox_shape))[::-1] // 2
                bbox_crop = img.crop((bbox[1] - padding - offset[1], 
                           bbox[0] - padding - offset[0],
                           bbox[3] + padding + offset[1], 
                           bbox[2] + padding + offset[0])) # (w - crop_w, h - crop_h, w, h))
                #img.save('crop{}.jpg'.format(np.random.randint(0,10)))
                bbox_crop = self.resize(bbox_crop)
                imgs.append(bbox_crop)
                imgs.append(self.flip(bbox_crop))

            # Add all crops 
            if multi_crop:
                imgs.append(self.flip(self.center_crop(resized_img)))
                imgs.extend(self.multi_crop(self.resize_for_crop(img)))

            # Convert everything to normalized tensor
            tensor_imgs = []
            for img in imgs:
                img = self.tensor_aug(img)
                img = self.norm_aug(img)
                tensor_imgs.append(img)
            return tensor_imgs

# ...class ImageLoader()


class DistributedBalancedSampler(Sampler):
    """Sampler for distributed training. It draws on average the same number
       of samples from each class even if the dataset itself is unbalanced.
    .. note::
        Dataset is assumed to be of constant size.
    Arguments:
        dataset: Dataset used for sampling.
        num_replicas (optional): Number of processes participating in
            distributed training.
        rank (optional): Rank of the current process within num_replicas.
    """

    def __init__(self, dataset, num_replicas=None, rank=None):
        if num_replicas is None:
            num_replicas = get_world_size()
        if rank is None:
            rank = get_rank()
        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0
        self.labels = dataset.get_labels()
        self.unique_labels, self.counts = np.unique(self.labels, return_counts=True)
        self.num_classes = len(self.unique_labels)
        self.num_samples_per_class = int(math.ceil(len(dataset) * 1.0 / self.num_classes / self.num_replicas))
        self.num_samples = self.num_samples_per_class * self.num_classes
        self.total_size = self.num_samples * self.num_replicas
        # we will create only len(dataset) indices per epoch to make a 
        # consistent experience for different samplers
        # Precompute a list of all images per class for speed reasons
        self.class_to_image_idx = {c:[] for c in self.unique_labels}
        for idx, label in enumerate(self.labels):
            self.class_to_image_idx[label].append(idx)
        # This will be a list of all images per class, from which we will draw without 
        # replacement and repopulate it once all images of a class have been drawn
        # We will add class images later and keep the list across epochs to make
        # sure that we always draw all images of a category before repeating images
        self.class_to_avail_images = {c:[] for c in self.unique_labels}


    def get_shuffled_class_images(self, label, generator):
        ''' Takes the list of images of class LABEL and returns 
          a shuffled copy of it '''
        tmp = self.class_to_image_idx[label].copy()
        image_perm = list(torch.randperm(len(tmp), generator=generator))
        return np.array(tmp)[image_perm].tolist()


    def __iter__(self):
        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)

        # We iterate max_images_per_class times, and each iteration, we go over 
        # each class and pick a sample of it
        indices = []
        for i in range(self.num_samples_per_class):
            # replace this by torch
            class_perm = list(torch.randperm(self.num_classes, generator=g))
            for c_id in class_perm:
                cur_class = self.unique_labels[c_id]
                if not self.class_to_avail_images[cur_class]:
                    self.class_to_avail_images[cur_class] = self.get_shuffled_class_images(cur_class, g)
                indices.append(self.class_to_avail_images[cur_class].pop())

        # add extra samples to make it evenly divisible
        indices += indices[:(self.total_size - len(indices))]
        assert len(indices) % self.num_replicas == 0

        # subsample
        indices = indices[self.rank::self.num_replicas]
        return iter(indices)

    def __len__(self):
        return self.num_samples

    def set_epoch(self, epoch):
        self.epoch = epoch

# ...class DistributedBalancedSampler(Sampler)


class JSONDataset(data.Dataset):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



PyTorchClassification/data_loader_cv.py [191:306]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            imgs = [self.center_crop(resized_img)]

            # Add all bboxes and their flip
            for bbox in bboxes:
                bbox_shape = np.array([bbox[2] - bbox[0], bbox[3] - bbox[1]])
                padding = bbox_shape.max() * 0.1
                # Add offset to the shorter side to crop a square patch
                offset = (bbox_shape - np.min(bbox_shape))[::-1] // 2
                bbox_crop = img.crop((bbox[1] - padding - offset[1], 
                           bbox[0] - padding - offset[0],
                           bbox[3] + padding + offset[1], 
                           bbox[2] + padding + offset[0])) # (w - crop_w, h - crop_h, w, h))
                #img.save('crop{}.jpg'.format(np.random.randint(0,10)))
                bbox_crop = self.resize(bbox_crop)
                imgs.append(bbox_crop)
                imgs.append(self.flip(bbox_crop))

            # Add all crops 
            if multi_crop:
                imgs.append(self.flip(self.center_crop(resized_img)))
                imgs.extend(self.multi_crop(self.resize_for_crop(img)))

            # Convert everything to normalized tensor
            tensor_imgs = []
            for img in imgs:
                img = self.tensor_aug(img)
                img = self.norm_aug(img)
                tensor_imgs.append(img)
            return tensor_imgs

# ...class ImageLoader()


class DistributedBalancedSampler(Sampler):
    """Sampler for distributed training. It draws on average the same number
       of samples from each class even if the dataset itself is unbalanced.
    .. note::
        Dataset is assumed to be of constant size.
    Arguments:
        dataset: Dataset used for sampling.
        num_replicas (optional): Number of processes participating in
            distributed training.
        rank (optional): Rank of the current process within num_replicas.
    """

    def __init__(self, dataset, num_replicas=None, rank=None):
        if num_replicas is None:
            num_replicas = get_world_size()
        if rank is None:
            rank = get_rank()
        self.dataset = dataset
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0
        self.labels = dataset.get_labels()
        self.unique_labels, self.counts = np.unique(self.labels, return_counts=True)
        self.num_classes = len(self.unique_labels)
        self.num_samples_per_class = int(math.ceil(len(dataset) * 1.0 / self.num_classes / self.num_replicas))
        self.num_samples = self.num_samples_per_class * self.num_classes
        self.total_size = self.num_samples * self.num_replicas
        # we will create only len(dataset) indices per epoch to make a 
        # consistent experience for different samplers
        # Precompute a list of all images per class for speed reasons
        self.class_to_image_idx = {c:[] for c in self.unique_labels}
        for idx, label in enumerate(self.labels):
            self.class_to_image_idx[label].append(idx)
        # This will be a list of all images per class, from which we will draw without 
        # replacement and repopulate it once all images of a class have been drawn
        # We will add class images later and keep the list across epochs to make
        # sure that we always draw all images of a category before repeating images
        self.class_to_avail_images = {c:[] for c in self.unique_labels}


    def get_shuffled_class_images(self, label, generator):
        ''' Takes the list of images of class LABEL and returns 
          a shuffled copy of it '''
        tmp = self.class_to_image_idx[label].copy()
        image_perm = list(torch.randperm(len(tmp), generator=generator))
        return np.array(tmp)[image_perm].tolist()


    def __iter__(self):
        # deterministically shuffle based on epoch
        g = torch.Generator()
        g.manual_seed(self.epoch)

        # We iterate max_images_per_class times, and each iteration, we go over 
        # each class and pick a sample of it
        indices = []
        for i in range(self.num_samples_per_class):
            # replace this by torch
            class_perm = list(torch.randperm(self.num_classes, generator=g))
            for c_id in class_perm:
                cur_class = self.unique_labels[c_id]
                if not self.class_to_avail_images[cur_class]:
                    self.class_to_avail_images[cur_class] = self.get_shuffled_class_images(cur_class, g)
                indices.append(self.class_to_avail_images[cur_class].pop())

        # add extra samples to make it evenly divisible
        indices += indices[:(self.total_size - len(indices))]
        assert len(indices) % self.num_replicas == 0

        # subsample
        indices = indices[self.rank::self.num_replicas]
        return iter(indices)

    def __len__(self):
        return self.num_samples

    def set_epoch(self, epoch):
        self.epoch = epoch

# ...class DistributedBalancedSampler(Sampler)


class JSONDataset(data.Dataset):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



