in tensorflow_datasets/object_detection/coco.py [0:0]
def _generate_examples(self, image_dir, annotation_dir, split_name,
annotation_type, panoptic_dir):
"""Generate examples as dicts.
Args:
image_dir: `str`, directory containing the images
annotation_dir: `str`, directory containing annotations
split_name: `str`, <split_name><year> (ex: train2014, val2017)
annotation_type: `AnnotationType`, the annotation format (NONE, BBOXES,
PANOPTIC)
panoptic_dir: If annotation_type is PANOPTIC, contains the panoptic image
directory
Yields:
example key and data
"""
if annotation_type == AnnotationType.BBOXES:
instance_filename = 'instances_{}.json'
elif annotation_type == AnnotationType.PANOPTIC:
instance_filename = 'panoptic_{}.json'
elif annotation_type == AnnotationType.NONE: # No annotation for test sets
instance_filename = 'image_info_{}.json'
# Load the annotations (label names, images metadata,...)
instance_path = os.path.join(
annotation_dir,
'annotations',
instance_filename.format(split_name),
)
coco_annotation = ANNOTATION_CLS[annotation_type](instance_path)
# Each category is a dict:
# {
# 'id': 51, # From 1-91, some entry missing
# 'name': 'bowl',
# 'supercategory': 'kitchen',
# }
categories = coco_annotation.categories
# Each image is a dict:
# {
# 'id': 262145,
# 'file_name': 'COCO_train2017_000000262145.jpg'
# 'flickr_url': 'http://farm8.staticflickr.com/7187/xyz.jpg',
# 'coco_url': 'http://images.cocodataset.org/train2017/xyz.jpg',
# 'license': 2,
# 'date_captured': '2013-11-20 02:07:55',
# 'height': 427,
# 'width': 640,
# }
images = coco_annotation.images
# TODO(b/121375022): ClassLabel names should also contains 'id' and
# and 'supercategory' (in addition to 'name')
# Warning: As Coco only use 80 out of the 91 labels, the c['id'] and
# dataset names ids won't match.
if self.builder_config.has_panoptic:
objects_key = 'panoptic_objects'
else:
objects_key = 'objects'
self.info.features[objects_key]['label'].names = [
c['name'] for c in categories
]
# TODO(b/121375022): Conversion should be done by ClassLabel
categories_id2name = {c['id']: c['name'] for c in categories}
# Iterate over all images
annotation_skipped = 0
for image_info in sorted(images, key=lambda x: x['id']):
if annotation_type == AnnotationType.BBOXES:
# Each instance annotation is a dict:
# {
# 'iscrowd': 0,
# 'bbox': [116.95, 305.86, 285.3, 266.03],
# 'image_id': 480023,
# 'segmentation': [[312.29, 562.89, 402.25, ...]],
# 'category_id': 58,
# 'area': 54652.9556,
# 'id': 86,
# }
instances = coco_annotation.get_annotations(img_id=image_info['id'])
elif annotation_type == AnnotationType.PANOPTIC:
# Each panoptic annotation is a dict:
# {
# 'file_name': '000000037777.png',
# 'image_id': 37777,
# 'segments_info': [
# {
# 'area': 353,
# 'category_id': 52,
# 'iscrowd': 0,
# 'id': 6202563,
# 'bbox': [221, 179, 37, 27],
# },
# ...
# ]
# }
panoptic_annotation = coco_annotation.get_annotations(
img_id=image_info['id'])
instances = panoptic_annotation['segments_info']
else:
instances = [] # No annotations
if not instances:
annotation_skipped += 1
def build_bbox(x, y, width, height):
# pylint: disable=cell-var-from-loop
# build_bbox is only used within the loop so it is ok to use image_info
return tfds.features.BBox(
ymin=y / image_info['height'],
xmin=x / image_info['width'],
ymax=(y + height) / image_info['height'],
xmax=(x + width) / image_info['width'],
)
# pylint: enable=cell-var-from-loop
example = {
'image': os.path.join(image_dir, split_name, image_info['file_name']),
'image/filename': image_info['file_name'],
'image/id': image_info['id'],
objects_key: [{ # pylint: disable=g-complex-comprehension
'id': instance['id'],
'area': instance['area'],
'bbox': build_bbox(*instance['bbox']),
'label': categories_id2name[instance['category_id']],
'is_crowd': bool(instance['iscrowd']),
} for instance in instances]
}
if self.builder_config.has_panoptic:
panoptic_filename = panoptic_annotation['file_name']
panoptic_image_path = os.path.join(panoptic_dir, panoptic_filename)
example['panoptic_image'] = panoptic_image_path
example['panoptic_image/filename'] = panoptic_filename
yield image_info['file_name'], example
logging.info(
'%d/%d images do not contains any annotations',
annotation_skipped,
len(images),
)