in sagemaker/src/htr_dataset.py [0:0]
def __getitem__(self, index):
item = self.annotations[index]
im = cv2.imread(os.path.join(self.images_path, item['filename']),
cv2.IMREAD_GRAYSCALE)
original_image_size = im.shape
resized_image, border_bb = resize_image(im, desired_size=self.PAGE_SIZE)
resized_image_size = resized_image.shape
annotations = item['annotation']
texts = []
bbs = []
for annotation in annotations:
texts.append(annotation['text'])
bbs.append(annotation['bb'])
bbs = np.array(bbs).astype(float)
if len(bbs.shape) == 3:
bbs = bbs[0]
bbs = self._normalise_bb(bbs, original_image_size)
if self.output_type == "line":
transformed_bb = transform_bb_after_resize(
bbs, border_bb, original_image_size, resized_image_size)
line_bb = np.expand_dims(annotations[0]['line_bb'], 0).astype(float)
line_bb = self._normalise_bb(line_bb, original_image_size)
transformed_line_bb = transform_bb_after_resize(
line_bb, border_bb, original_image_size, resized_image_size)
resized_image, transformed_bb, texts = self._crop_image(
resized_image, transformed_line_bb, transformed_bb, texts, self.LINE_SIZE)
elif self.output_type == "word":
transformed_bb = transform_bb_after_resize(
bbs, border_bb, original_image_size, resized_image_size)
word_bb = np.expand_dims(annotations[0]['bb'], 0).astype(float)
word_bb = self._normalise_bb(word_bb, original_image_size)
transformed_word_bb = transform_bb_after_resize(
word_bb, border_bb, original_image_size, resized_image_size)
resized_image, _, texts = self._crop_image(
resized_image, transformed_word_bb, transformed_bb, [texts], self.WORD_SIZE)
# Word output_type has no bounding boxes
transformed_bb = np.array([[0, 0, 1, 1]])
elif self.output_type == "page":
transformed_bb = transform_bb_after_resize(
bbs, border_bb, original_image_size, resized_image_size)
if self.transform is not None:
return self.transform(resized_image, transformed_bb, texts)
else:
return self.transform(resized_image, transformed_bb, texts)