def break_into_images_annotations()

in data_management/megadb/converters/megadb_to_cct.py [0:0]


def break_into_images_annotations(mega_db, bbox_only):
    cct_images = []
    cct_annotations = []

    num_images_in_cct = 0
    num_images_skipped = 0

    for seq in tqdm(mega_db):
        assert 'seq_id' in seq and 'images' in seq
        for i in seq['images']:
            assert 'file' in i

        seq_level_classes = seq.get('class', [])

        seq_level_props = {}
        for prop_name, prop_val in seq.items():
            # `id` is from the database, as well as all attributes starting with _
            if prop_name in ['seq_id', 'images', 'class', 'id']:
                continue
            if prop_name.startswith('_'):
                continue
            seq_level_props[prop_name] = prop_val

        # if valuable sequence information is available, add them to the image
        seq_info_available = True if not seq['seq_id'].startswith('dummy_') else False
        if seq_info_available:
            seq_num_frames = len(seq['images'])

        for im in seq['images']:

            if 'bbox' not in im:
                num_images_skipped += 1
                continue

            num_images_in_cct += 1

            # required fields for an image object
            im_object = {
                'id': im['image_id'] if 'image_id' in im else str(uuid.uuid1()),
                'file_name': im['file']
            }

            if seq_info_available:
                im_object['seq_id'] = seq['seq_id']
                im_object['seq_num_frames'] = seq_num_frames
                if 'frame_num' in im:
                    im_object['frame_num'] = im['frame_num']

            # add seq-level class labels for this image
            if not bbox_only and len(seq_level_classes) > 0:
                for cls in seq_level_classes:
                    cct_annotations.append({
                        'id': str(uuid.uuid1()),
                        'image_id': im_object['id'],
                        'sequence_level_annotation': True,
                        'category_name': cls  # later converted to category_id
                    })
            # add other sequence-level properties to each image too
            for seq_prop, seq_prop_val in seq_level_props.items():
                im_object[seq_prop] = seq_prop_val

            # add other image-level properties
            for im_prop in im:
                if im_prop in ['file', 'frame_num', 'id', 'file_name']:
                    continue  # already added or need to leave out (e.g. 'id')
                elif im_prop == 'class':  # image-level "species" labels; not the bbox type labels

                    if bbox_only:
                        continue

                    for cls in im['class']:
                        if cls not in seq_level_classes:
                            cct_annotations.append({
                                'id': str(uuid.uuid1()),
                                'image_id': im_object['id'],
                                'category_name': cls  # later converted to category_id
                            })
                elif im_prop == 'bbox':
                        for b in im['bbox']:
                            cct_annotations.append({
                                'id': str(uuid.uuid1()),
                                'image_id': im_object['id'],
                                'category_name': b['category'],
                                'bbox': b['bbox']
                            })
                else:
                    im_object[im_prop] = im[im_prop]

            cct_images.append(im_object)
        # ... for im in seq['images']
    # ... for seq in mega_db

    print('Number of empty images: {}'.format(num_images_skipped))
    return cct_images, cct_annotations