in utils/datasets.py [0:0]
def __getitem__(self, index):
ta = time.time()
index = self.indices[index] # linear, shuffled, or image_weights
hyp = self.hyp
mosaic = self.mosaic and random.random() <= hyp['mosaic']
if mosaic:
# Load mosaic
img, msk, labels = load_mosaic(self, index)
shapes = None
# MixUp https://arxiv.org/pdf/1710.09412.pdf
if random.random() < hyp['mixup']:
img2, msk2, labels2 = load_mosaic(self, random.randint(0, self.n - 1))
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
img = (img * r + img2 * (1 - r)).astype(np.uint8)
np.maximum(msk, msk2, out=msk)
labels = np.concatenate((labels, labels2), 0)
else:
# Load image
(img, msk), (h0, w0), (h, w) = load_image(self, index)
# assert msk is None
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, msk, ratio, pad = letterbox(img, shape, msk=msk, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.augment:
# Augment imagespace
if not mosaic:
img, msk, labels = random_perspective(img, msk, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'])
# Augment colorspace
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Apply cutouts
# if random.random() < 0.9:
# labels = cutout(img, labels)
nL = len(labels) # number of labels
if nL:
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
if self.augment:
# flip up-down
if random.random() < hyp['flipud']:
img = np.flipud(img)
msk = np.flipud(msk) if msk is not None else None
if nL:
labels[:, 2] = 1 - labels[:, 2]
# flip left-right
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
msk = np.fliplr(msk) if msk is not None else None
if nL:
labels[:, 1] = 1 - labels[:, 1]
labels_out = torch.zeros((nL, 6))
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
if msk is not None:
# msk = cv2.resize(msk, (msk.shape[1]//8, msk.shape[0]//8), interpolation=cv2.INTER_AREA) Warning: small objects may be overwhelmed
msk = block_reduce(msk, (8, 8, 1), np.max) # MaxPool2D
msk = np.ascontiguousarray(msk.transpose(2, 0, 1)) # chw
mask, weight = np.split(msk, 2, axis=0)
# # 放在这里会比放在train.py或者test.py中要快一点点,毕竟dataloader是异步操作
# # 但是不适用于multi-scale训练,因为插值会破坏mask的绝对值
# mask, weight = target2mask(labels_out, img.shape, 1)
tb = time.time()
# print(f'Data: {tb - ta:.3f}s')
return torch.from_numpy(img), labels_out, torch.from_numpy(mask), torch.from_numpy(weight), self.img_files[index], shapes