def make_cct_embedded()

in data_management/megadb/converters/cct_to_megadb.py [0:0]
73 lines of code
23 McCabe index (conditional complexity)

def make_cct_embedded(image_db=None, bbox_db=None):
    """
    Takes in path to the COCO Camera Trap format jsons for images (species labels) and/or
    bboxes (animal/human/vehicle) labels and embed the class names and annotations into the image entries.

    Since IndexedJsonDb() can take either a path or a loaded json object as a dict, both
    arguments can be paths or loaded json objects

    Returns:
        an embedded version of the COCO Camera Trap format json database
    """


    # at first a dict of image_id: image_obj with annotations embedded, then it becomes
    # an array of image objects
    docs = {}

    # %% integrate the image DB
    if image_db:
        print('Loading image DB...')
        cct_json_db = IndexedJsonDb(image_db)
        docs = cct_json_db.image_id_to_image  # each image entry is first assigned the image object

        # takes in image entries and species and other annotations in the image DB
        num_images_with_more_than_1_species = 0
        for image_id, annotations in cct_json_db.image_id_to_annotations.items():
            docs[image_id]['annotations'] = {
                'species': []
            }
            if len(annotations) > 1:
                num_images_with_more_than_1_species += 1
            for anno in annotations:
                # convert the species category to explicit string name
                cat_name = cct_json_db.cat_id_to_name[anno['category_id']]
                docs[image_id]['annotations']['species'].append(cat_name)

                # there may be other fields in the annotation object
                for anno_field_name, anno_field_val in anno.items():
                    # these fields should already be gotten from the image object
                    if anno_field_name not in ['category_id', 'id', 'image_id', 'datetime', 'location', 'sequence_level_annotation', 'seq_id', 'seq_num_frames', 'frame_num']:
                        docs[image_id]['annotations'][anno_field_name] = anno_field_val

        print('Number of items from the image DB:', len(docs))
        print('Number of images with more than 1 species: {} ({}% of image DB)'.format(
            num_images_with_more_than_1_species, round(100 * num_images_with_more_than_1_species / len(docs), 2)))

    #%% integrate the bbox DB
    if bbox_db:
        print('Loading bbox DB...')
        cct_bbox_json_db = IndexedJsonDb(bbox_db)

        # add any images that are not in the image DB
        # also add any fields in the image object that are not present already
        num_added = 0
        num_amended = 0
        for image_id, image_obj in cct_bbox_json_db.image_id_to_image.items():
            if image_id not in docs:
                docs[image_id] = image_obj
                num_added += 1

            amended = False
            for field_name, val in image_obj.items():
                if field_name not in docs[image_id]:
                    docs[image_id][field_name] = val
                    amended = True
            if amended:
                num_amended += 1

        print('Number of images added from bbox DB entries: ', num_added)
        print('Number of images amended: ', num_amended)
        print('Number of items in total: ', len(docs))

        # add bbox to the annotations field
        num_more_than_1_bbox = 0

        for image_id, bbox_annotations in cct_bbox_json_db.image_id_to_annotations.items():

            # for any newly added images
            if 'annotations' not in docs[image_id]:
                docs[image_id]['annotations'] = {}

            docs[image_id]['annotations']['bbox'] = []

            if len(bbox_annotations) > 1:
                num_more_than_1_bbox += 1

            for bbox_anno in bbox_annotations:
                item_bbox = {
                    'category': cct_bbox_json_db.cat_id_to_name[bbox_anno['category_id']],
                    # 'bbox_abs': bbox_anno['bbox'],
                }
                if 'bbox' in bbox_anno:
                    if 'width' in docs[image_id]:
                        image_w = docs[image_id]['width']
                        image_h = docs[image_id]['height']
                        x, y, w, h = bbox_anno['bbox']
                        item_bbox['bbox_rel'] = [
                            truncate_float(x / image_w),
                            truncate_float(y / image_h),
                            truncate_float(w / image_w),
                            truncate_float(h / image_h)
                        ]

                    docs[image_id]['annotations']['bbox'].append(item_bbox)

            # not keeping height and width
            del docs[image_id]['width']
            del docs[image_id]['height']

        print('Number of images with more than one bounding box: {} ({}% of all entries)'.format(
            num_more_than_1_bbox, 100 * num_more_than_1_bbox / len(docs), 2))
    else:
        print('No bbox DB provided.')

    assert len(docs) > 0, 'No image entries found in the image or bbox DB jsons provided.'

    docs = list(docs.values())
    return docs