timm/data/naflex

"""Variable‑size Mixup / CutMix utilities for NaFlex data loaders. This module provides: * `mix_batch_variable_size` – pixel‑level Mixup/CutMix that operates on a list of images whose spatial sizes differ, mixing only their central overlap so no resizing is required. * `pairwise_mixup_target` – builds soft‑label targets that exactly match the per‑sample pixel provenance produced by the mixer. * `NaFlexMixup` – a callable functor that wraps the two helpers and stores all augmentation hyper‑parameters in one place, making it easy to plug into different dataset wrappers. Hacked together by / Copyright 2025, Ross Wightman, Hugging Face """ import math import random from typing import Dict, List, Tuple, Union import torch def mix_batch_variable_size( imgs: List[torch.Tensor], *, mixup_alpha: float = 0.8, cutmix_alpha: float = 1.0, switch_prob: float = 0.5, local_shuffle: int = 4, ) -> Tuple[List[torch.Tensor], List[float], Dict[int, int]]: """Apply Mixup or CutMix on a batch of variable-sized images. Sorts images by aspect ratio and pairs neighboring samples. Only the mutual central overlap region of each pair is mixed. Args: imgs: List of transformed images shaped (C, H, W). mixup_alpha: Beta distribution alpha for Mixup. Set to 0 to disable. cutmix_alpha: Beta distribution alpha for CutMix. Set to 0 to disable. switch_prob: Probability of using CutMix when both modes are enabled. local_shuffle: Size of local windows for shuffling after aspect sorting. Returns: Tuple of (mixed_imgs, lam_list, pair_to) where: - mixed_imgs: List of mixed images - lam_list: Per-sample lambda values representing mixing degree - pair_to: Mapping i -> j of which sample was mixed with which """ if len(imgs) < 2: raise ValueError("Need at least two images to perform Mixup/CutMix.") # Decide augmentation mode and raw λ if mixup_alpha > 0.0 and cutmix_alpha > 0.0: use_cutmix = torch.rand(()).item() < switch_prob alpha = cutmix_alpha if use_cutmix else mixup_alpha elif mixup_alpha > 0.0: use_cutmix = False alpha = mixup_alpha elif cutmix_alpha > 0.0: use_cutmix = True alpha = cutmix_alpha else: raise ValueError("Both mixup_alpha and cutmix_alpha are zero – nothing to do.") lam_raw = torch.distributions.Beta(alpha, alpha).sample().item() lam_raw = max(0.0, min(1.0, lam_raw)) # numerical safety # Pair images by nearest aspect ratio order = sorted(range(len(imgs)), key=lambda i: imgs[i].shape[2] / imgs[i].shape[1]) if local_shuffle > 1: for start in range(0, len(order), local_shuffle): random.shuffle(order[start:start + local_shuffle]) pair_to: Dict[int, int] = {} for a, b in zip(order[::2], order[1::2]): pair_to[a] = b pair_to[b] = a odd_one = order[-1] if len(imgs) % 2 else None mixed_imgs: List[torch.Tensor] = [None] * len(imgs) lam_list: List[float] = [1.0] * len(imgs) for i in range(len(imgs)): if i == odd_one: mixed_imgs[i] = imgs[i] continue j = pair_to[i] xi, xj = imgs[i], imgs[j] _, hi, wi = xi.shape _, hj, wj = xj.shape dest_area = hi * wi # Central overlap common to both images oh, ow = min(hi, hj), min(wi, wj) overlap_area = oh * ow top_i, left_i = (hi - oh) // 2, (wi - ow) // 2 top_j, left_j = (hj - oh) // 2, (wj - ow) // 2 xi = xi.clone() if use_cutmix: # CutMix: random rectangle inside the overlap cut_ratio = math.sqrt(1.0 - lam_raw) ch, cw = int(oh * cut_ratio), int(ow * cut_ratio) cut_area = ch * cw y_off = random.randint(0, oh - ch) x_off = random.randint(0, ow - cw) yl_i, xl_i = top_i + y_off, left_i + x_off yl_j, xl_j = top_j + y_off, left_j + x_off xi[:, yl_i: yl_i + ch, xl_i: xl_i + cw] = xj[:, yl_j: yl_j + ch, xl_j: xl_j + cw] mixed_imgs[i] = xi corrected_lam = 1.0 - cut_area / float(dest_area) lam_list[i] = corrected_lam else: # Mixup: blend the entire overlap region patch_i = xi[:, top_i:top_i + oh, left_i:left_i + ow] patch_j = xj[:, top_j:top_j + oh, left_j:left_j + ow] blended = patch_i.mul(lam_raw).add_(patch_j, alpha=1.0 - lam_raw) xi[:, top_i:top_i + oh, left_i:left_i + ow] = blended mixed_imgs[i] = xi corrected_lam = (dest_area - overlap_area) / dest_area + lam_raw * overlap_area / dest_area lam_list[i] = corrected_lam return mixed_imgs, lam_list, pair_to def smoothed_sparse_target( targets: torch.Tensor, *, num_classes: int, smoothing: float = 0.0, ) -> torch.Tensor: off_val = smoothing / num_classes on_val = 1.0 - smoothing + off_val y_onehot = torch.full( (targets.size(0), num_classes), off_val, dtype=torch.float32, device=targets.device ) y_onehot.scatter_(1, targets.unsqueeze(1), on_val) return y_onehot def pairwise_mixup_target( targets: torch.Tensor, pair_to: Dict[int, int], lam_list: List[float], *, num_classes: int, smoothing: float = 0.0, ) -> torch.Tensor: """Create soft targets that match the pixel‑level mixing performed. Args: targets: (B,) tensor of integer class indices. pair_to: Mapping of sample index to its mixed partner as returned by mix_batch_variable_size(). lam_list: Per‑sample fractions of own pixels, also from the mixer. num_classes: Total number of classes in the dataset. smoothing: Label‑smoothing value in the range [0, 1). Returns: Tensor of shape (B, num_classes) whose rows sum to 1. """ y_onehot = smoothed_sparse_target(targets, num_classes=num_classes, smoothing=smoothing) targets = y_onehot.clone() for i, j in pair_to.items(): lam = lam_list[i] targets[i].mul_(lam).add_(y_onehot[j], alpha=1.0 - lam) return targets class NaFlexMixup: """Callable wrapper that combines mixing and target generation.""" def __init__( self, *, num_classes: int, mixup_alpha: float = 0.8, cutmix_alpha: float = 1.0, switch_prob: float = 0.5, prob: float = 1.0, local_shuffle: int = 4, label_smoothing: float = 0.0, ) -> None: """Configure the augmentation. Args: num_classes: Total number of classes. mixup_alpha: Beta α for Mixup. 0 disables Mixup. cutmix_alpha: Beta α for CutMix. 0 disables CutMix. switch_prob: Probability of selecting CutMix when both modes are enabled. prob: Probability of applying any mixing per batch. local_shuffle: Window size used to shuffle images after aspect sorting so pairings vary between epochs. smoothing: Label‑smoothing value. 0 disables smoothing. """ self.num_classes = num_classes self.mixup_alpha = mixup_alpha self.cutmix_alpha = cutmix_alpha self.switch_prob = switch_prob self.prob = prob self.local_shuffle = local_shuffle self.smoothing = label_smoothing def __call__( self, imgs: List[torch.Tensor], targets: torch.Tensor, ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: """Apply the augmentation and generate matching targets. Args: imgs: List of already transformed images shaped (C, H, W). targets: Hard labels with shape (B,). Returns: mixed_imgs: List of mixed images in the same order and shapes as the input. targets: Soft‑label tensor shaped (B, num_classes) suitable for cross‑entropy with soft targets. """ if not isinstance(targets, torch.Tensor): targets = torch.tensor(targets) if random.random() > self.prob: targets = smoothed_sparse_target(targets, num_classes=self.num_classes, smoothing=self.smoothing) return imgs, targets.unbind(0) mixed_imgs, lam_list, pair_to = mix_batch_variable_size( imgs, mixup_alpha=self.mixup_alpha, cutmix_alpha=self.cutmix_alpha, switch_prob=self.switch_prob, local_shuffle=self.local_shuffle, ) targets = pairwise_mixup_target( targets, pair_to, lam_list, num_classes=self.num_classes, smoothing=self.smoothing, ) return mixed_imgs, targets.unbind(0)

timm/data/naflex_mixup.py (147 lines of code) (raw):