def _generate_examples()

in tensorflow_datasets/object_detection/coco.py [0:0]
66 lines of code
8 McCabe index (conditional complexity)

  def _generate_examples(self, image_dir, annotation_dir, split_name,
                         annotation_type, panoptic_dir):
    """Generate examples as dicts.

    Args:
      image_dir: `str`, directory containing the images
      annotation_dir: `str`, directory containing annotations
      split_name: `str`, <split_name><year> (ex: train2014, val2017)
      annotation_type: `AnnotationType`, the annotation format (NONE, BBOXES,
        PANOPTIC)
      panoptic_dir: If annotation_type is PANOPTIC, contains the panoptic image
        directory

    Yields:
      example key and data
    """

    if annotation_type == AnnotationType.BBOXES:
      instance_filename = 'instances_{}.json'
    elif annotation_type == AnnotationType.PANOPTIC:
      instance_filename = 'panoptic_{}.json'
    elif annotation_type == AnnotationType.NONE:  # No annotation for test sets
      instance_filename = 'image_info_{}.json'

    # Load the annotations (label names, images metadata,...)
    instance_path = os.path.join(
        annotation_dir,
        'annotations',
        instance_filename.format(split_name),
    )
    coco_annotation = ANNOTATION_CLS[annotation_type](instance_path)
    # Each category is a dict:
    # {
    #    'id': 51,  # From 1-91, some entry missing
    #    'name': 'bowl',
    #    'supercategory': 'kitchen',
    # }
    categories = coco_annotation.categories
    # Each image is a dict:
    # {
    #     'id': 262145,
    #     'file_name': 'COCO_train2017_000000262145.jpg'
    #     'flickr_url': 'http://farm8.staticflickr.com/7187/xyz.jpg',
    #     'coco_url': 'http://images.cocodataset.org/train2017/xyz.jpg',
    #     'license': 2,
    #     'date_captured': '2013-11-20 02:07:55',
    #     'height': 427,
    #     'width': 640,
    # }
    images = coco_annotation.images

    # TODO(b/121375022): ClassLabel names should also contains 'id' and
    # and 'supercategory' (in addition to 'name')
    # Warning: As Coco only use 80 out of the 91 labels, the c['id'] and
    # dataset names ids won't match.
    if self.builder_config.has_panoptic:
      objects_key = 'panoptic_objects'
    else:
      objects_key = 'objects'
    self.info.features[objects_key]['label'].names = [
        c['name'] for c in categories
    ]
    # TODO(b/121375022): Conversion should be done by ClassLabel
    categories_id2name = {c['id']: c['name'] for c in categories}

    # Iterate over all images
    annotation_skipped = 0
    for image_info in sorted(images, key=lambda x: x['id']):
      if annotation_type == AnnotationType.BBOXES:
        # Each instance annotation is a dict:
        # {
        #     'iscrowd': 0,
        #     'bbox': [116.95, 305.86, 285.3, 266.03],
        #     'image_id': 480023,
        #     'segmentation': [[312.29, 562.89, 402.25, ...]],
        #     'category_id': 58,
        #     'area': 54652.9556,
        #     'id': 86,
        # }
        instances = coco_annotation.get_annotations(img_id=image_info['id'])
      elif annotation_type == AnnotationType.PANOPTIC:
        # Each panoptic annotation is a dict:
        # {
        #     'file_name': '000000037777.png',
        #     'image_id': 37777,
        #     'segments_info': [
        #         {
        #             'area': 353,
        #             'category_id': 52,
        #             'iscrowd': 0,
        #             'id': 6202563,
        #             'bbox': [221, 179, 37, 27],
        #         },
        #         ...
        #     ]
        # }
        panoptic_annotation = coco_annotation.get_annotations(
            img_id=image_info['id'])
        instances = panoptic_annotation['segments_info']
      else:
        instances = []  # No annotations

      if not instances:
        annotation_skipped += 1

      def build_bbox(x, y, width, height):
        # pylint: disable=cell-var-from-loop
        # build_bbox is only used within the loop so it is ok to use image_info
        return tfds.features.BBox(
            ymin=y / image_info['height'],
            xmin=x / image_info['width'],
            ymax=(y + height) / image_info['height'],
            xmax=(x + width) / image_info['width'],
        )
        # pylint: enable=cell-var-from-loop

      example = {
          'image': os.path.join(image_dir, split_name, image_info['file_name']),
          'image/filename': image_info['file_name'],
          'image/id': image_info['id'],
          objects_key: [{   # pylint: disable=g-complex-comprehension
              'id': instance['id'],
              'area': instance['area'],
              'bbox': build_bbox(*instance['bbox']),
              'label': categories_id2name[instance['category_id']],
              'is_crowd': bool(instance['iscrowd']),
          } for instance in instances]
      }
      if self.builder_config.has_panoptic:
        panoptic_filename = panoptic_annotation['file_name']
        panoptic_image_path = os.path.join(panoptic_dir, panoptic_filename)
        example['panoptic_image'] = panoptic_image_path
        example['panoptic_image/filename'] = panoptic_filename

      yield image_info['file_name'], example

    logging.info(
        '%d/%d images do not contains any annotations',
        annotation_skipped,
        len(images),
    )