def __getitem__()

in sagemaker/src/htr_dataset.py [0:0]


    def __getitem__(self, index):
        item = self.annotations[index]
        
        im = cv2.imread(os.path.join(self.images_path, item['filename']),
                        cv2.IMREAD_GRAYSCALE)
        original_image_size = im.shape
        resized_image, border_bb = resize_image(im, desired_size=self.PAGE_SIZE)
        resized_image_size = resized_image.shape
           
        annotations = item['annotation']
        texts = []
        bbs = []
        for annotation in annotations:
            texts.append(annotation['text'])
            bbs.append(annotation['bb'])
        bbs = np.array(bbs).astype(float)
        if len(bbs.shape) == 3:
            bbs = bbs[0]
        bbs = self._normalise_bb(bbs, original_image_size)
                
        if self.output_type == "line":
            transformed_bb = transform_bb_after_resize(
                bbs, border_bb, original_image_size, resized_image_size)

            line_bb = np.expand_dims(annotations[0]['line_bb'], 0).astype(float)
            line_bb = self._normalise_bb(line_bb, original_image_size) 
            
            transformed_line_bb = transform_bb_after_resize(
                line_bb, border_bb, original_image_size, resized_image_size)
            resized_image, transformed_bb, texts = self._crop_image(
                resized_image, transformed_line_bb, transformed_bb, texts, self.LINE_SIZE)
        
        elif self.output_type == "word":
            transformed_bb = transform_bb_after_resize(
                bbs, border_bb, original_image_size, resized_image_size)

            word_bb = np.expand_dims(annotations[0]['bb'], 0).astype(float)
            word_bb = self._normalise_bb(word_bb, original_image_size) 
        
            transformed_word_bb = transform_bb_after_resize(
                word_bb, border_bb, original_image_size, resized_image_size)

            resized_image, _, texts = self._crop_image(
                resized_image, transformed_word_bb, transformed_bb, [texts], self.WORD_SIZE)
            
            # Word output_type has no bounding boxes
            transformed_bb = np.array([[0, 0, 1, 1]])

        elif self.output_type == "page":

            transformed_bb = transform_bb_after_resize(
                bbs, border_bb, original_image_size, resized_image_size)
        
        if self.transform is not None:
            return self.transform(resized_image, transformed_bb, texts)
        else:
            return self.transform(resized_image, transformed_bb, texts)