in src/sagemaker_defect_detection/dataset/neu.py [0:0]
def __getitem__(self, idx: int):
# Note: images are grayscaled BUT resnet needs 3 channels
image = cv2.imread(self.samples[idx].image_path) # BGR channel last
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
boxes = self._get_bboxes(self.samples[idx].annotations)
num_objs = len(boxes)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.tensor([self.samples[idx].class_idx] * num_objs, dtype=torch.int64)
image_id = torch.tensor([idx], dtype=torch.int64)
iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["iscrowd"] = iscrowd
if self.augmentation is not None:
sample = self.augmentation(**{"image": image, "bboxes": boxes, "labels": labels})
image = sample["image"]
target["boxes"] = torch.as_tensor(sample["bboxes"], dtype=torch.float32)
# guards against crops that don't pass the min_visibility augmentation threshold
if not target["boxes"].numel():
return None
target["labels"] = torch.as_tensor(sample["labels"], dtype=torch.int64)
if self.preprocess is not None:
image = self.preprocess(image=image)["image"]
boxes = target["boxes"]
target["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
return image, target, image_id