in data_management/megadb/converters/cct_to_megadb.py [0:0]
def make_cct_embedded(image_db=None, bbox_db=None):
"""
Takes in path to the COCO Camera Trap format jsons for images (species labels) and/or
bboxes (animal/human/vehicle) labels and embed the class names and annotations into the image entries.
Since IndexedJsonDb() can take either a path or a loaded json object as a dict, both
arguments can be paths or loaded json objects
Returns:
an embedded version of the COCO Camera Trap format json database
"""
# at first a dict of image_id: image_obj with annotations embedded, then it becomes
# an array of image objects
docs = {}
# %% integrate the image DB
if image_db:
print('Loading image DB...')
cct_json_db = IndexedJsonDb(image_db)
docs = cct_json_db.image_id_to_image # each image entry is first assigned the image object
# takes in image entries and species and other annotations in the image DB
num_images_with_more_than_1_species = 0
for image_id, annotations in cct_json_db.image_id_to_annotations.items():
docs[image_id]['annotations'] = {
'species': []
}
if len(annotations) > 1:
num_images_with_more_than_1_species += 1
for anno in annotations:
# convert the species category to explicit string name
cat_name = cct_json_db.cat_id_to_name[anno['category_id']]
docs[image_id]['annotations']['species'].append(cat_name)
# there may be other fields in the annotation object
for anno_field_name, anno_field_val in anno.items():
# these fields should already be gotten from the image object
if anno_field_name not in ['category_id', 'id', 'image_id', 'datetime', 'location', 'sequence_level_annotation', 'seq_id', 'seq_num_frames', 'frame_num']:
docs[image_id]['annotations'][anno_field_name] = anno_field_val
print('Number of items from the image DB:', len(docs))
print('Number of images with more than 1 species: {} ({}% of image DB)'.format(
num_images_with_more_than_1_species, round(100 * num_images_with_more_than_1_species / len(docs), 2)))
#%% integrate the bbox DB
if bbox_db:
print('Loading bbox DB...')
cct_bbox_json_db = IndexedJsonDb(bbox_db)
# add any images that are not in the image DB
# also add any fields in the image object that are not present already
num_added = 0
num_amended = 0
for image_id, image_obj in cct_bbox_json_db.image_id_to_image.items():
if image_id not in docs:
docs[image_id] = image_obj
num_added += 1
amended = False
for field_name, val in image_obj.items():
if field_name not in docs[image_id]:
docs[image_id][field_name] = val
amended = True
if amended:
num_amended += 1
print('Number of images added from bbox DB entries: ', num_added)
print('Number of images amended: ', num_amended)
print('Number of items in total: ', len(docs))
# add bbox to the annotations field
num_more_than_1_bbox = 0
for image_id, bbox_annotations in cct_bbox_json_db.image_id_to_annotations.items():
# for any newly added images
if 'annotations' not in docs[image_id]:
docs[image_id]['annotations'] = {}
docs[image_id]['annotations']['bbox'] = []
if len(bbox_annotations) > 1:
num_more_than_1_bbox += 1
for bbox_anno in bbox_annotations:
item_bbox = {
'category': cct_bbox_json_db.cat_id_to_name[bbox_anno['category_id']],
# 'bbox_abs': bbox_anno['bbox'],
}
if 'bbox' in bbox_anno:
if 'width' in docs[image_id]:
image_w = docs[image_id]['width']
image_h = docs[image_id]['height']
x, y, w, h = bbox_anno['bbox']
item_bbox['bbox_rel'] = [
truncate_float(x / image_w),
truncate_float(y / image_h),
truncate_float(w / image_w),
truncate_float(h / image_h)
]
docs[image_id]['annotations']['bbox'].append(item_bbox)
# not keeping height and width
del docs[image_id]['width']
del docs[image_id]['height']
print('Number of images with more than one bounding box: {} ({}% of all entries)'.format(
num_more_than_1_bbox, 100 * num_more_than_1_bbox / len(docs), 2))
else:
print('No bbox DB provided.')
assert len(docs) > 0, 'No image entries found in the image or bbox DB jsons provided.'
docs = list(docs.values())
return docs