def process_images()

in visualization/visualize_db.py [0:0]
186 lines of code
31 McCabe index (conditional complexity)

def process_images(db_path, output_dir, image_base_dir, options=None):
    """
    Writes images and html to output_dir to visualize the annotations in the json file
    db_path.
    
    db_path can also be a previously-loaded database.
    
    Returns the html filename and the database:
        
    return htmlOutputFile,image_db
    """    
    
    if options is None:
        options = DbVizOptions()
    
    print(options.__dict__)
    
    if image_base_dir.startswith('http'):
        if not image_base_dir.endswith('/'):
            image_base_dir += '/'
    else:
        assert(os.path.isdir(image_base_dir))
            
    os.makedirs(os.path.join(output_dir, 'rendered_images'), exist_ok=True)
    
    if isinstance(db_path,str):
        assert(os.path.isfile(db_path))    
        print('Loading database from {}...'.format(db_path))
        image_db = json.load(open(db_path))
        print('...done')
    elif isinstance(db_path,dict):
        print('Using previously-loaded DB')
        image_db = db_path
    else:
        raise ValueError('Illegal dictionary or filename')    
        
    annotations = image_db['annotations']
    images = image_db['images']
    categories = image_db['categories']
    
    # Optionally remove all images without bounding boxes, *before* sampling
    if options.trim_to_images_with_bboxes:
        
        bHasBbox = [False] * len(annotations)
        for iAnn,ann in enumerate(annotations):
            if 'bbox' in ann:
                assert isinstance(ann['bbox'],list)
                bHasBbox[iAnn] = True
        annotationsWithBboxes = list(compress(annotations, bHasBbox))
        
        imageIDsWithBboxes = [x['image_id'] for x in annotationsWithBboxes]
        imageIDsWithBboxes = set(imageIDsWithBboxes)
        
        bImageHasBbox = [False] * len(images)
        for iImage,image in enumerate(images):
            imageID = image['id']
            if imageID in imageIDsWithBboxes:
                bImageHasBbox[iImage] = True
        imagesWithBboxes = list(compress(images, bImageHasBbox))
        images = imagesWithBboxes
                
    # Optionally include/remove images with specific labels, *before* sampling
        
    assert (not ((options.classes_to_exclude is not None) and (options.classes_to_include is not None))), \
        'Cannot specify an inclusion and exclusion list'
        
    if (options.classes_to_exclude is not None) or (options.classes_to_include is not None):
     
        print('Indexing database')
        indexed_db = IndexedJsonDb(image_db)
        bValidClass = [True] * len(images)        
        for iImage,image in enumerate(images):
            classes = indexed_db.get_classes_for_image(image)
            if options.classes_to_exclude is not None:
                for excludedClass in options.classes_to_exclude:
                    if excludedClass in classes:
                       bValidClass[iImage] = False
                       break
            elif options.classes_to_include is not None:
                bValidClass[iImage] = False
                if options.multiple_categories_tag in options.classes_to_include:
                    if len(classes) > 1:
                        bValidClass[iImage] = True        
                if not bValidClass[iImage]:
                    for c in classes:
                        if c in options.classes_to_include:
                            bValidClass[iImage] = True
                            break                        
            else:
                raise ValueError('Illegal include/exclude combination')
                
        imagesWithValidClasses = list(compress(images, bValidClass))
        images = imagesWithValidClasses    
    
    
    # Put the annotations in a dataframe so we can select all annotations for a given image
    print('Creating data frames')
    df_anno = pd.DataFrame(annotations)
    df_img = pd.DataFrame(images)
    
    # Construct label map
    label_map = {}
    for cat in categories:
        label_map[int(cat['id'])] = cat['name']
    
    # Take a sample of images
    if options.num_to_visualize is not None:
        df_img = df_img.sample(n=options.num_to_visualize,random_state=options.random_seed)
    
    images_html = []
    
    # Set of dicts representing inputs to render_db_bounding_boxes:
    #
    # bboxes, boxClasses, image_path
    rendering_info = []
    
    print('Preparing rendering list')
    # iImage = 0
    for iImage in tqdm(range(len(df_img))):
        
        img = df_img.iloc[iImage]
        
        img_id = img['id']
        img_relative_path = img['file_name']
        
        if image_base_dir.startswith('http'):
            img_path = image_base_dir + img_relative_path
        else:
            img_path = os.path.join(image_base_dir, image_filename_to_path(img_relative_path, image_base_dir))
    
        annos_i = df_anno.loc[df_anno['image_id'] == img_id, :]  # all annotations on this image
    
        bboxes = []
        boxClasses = []
        
        # All the class labels we've seen for this image (with out without bboxes)
        imageCategories = set()
        
        annotationLevelForImage = ''
        
        # Iterate over annotations for this image
        # iAnn = 0; anno = annos_i.iloc[iAnn]
        for iAnn,anno in annos_i.iterrows():
        
            if 'sequence_level_annotation' in anno:
                bSequenceLevelAnnotation = anno['sequence_level_annotation']
                if bSequenceLevelAnnotation:
                    annLevel = 'sequence'
                else:
                    annLevel = 'image'
                if annotationLevelForImage == '':
                    annotationLevelForImage = annLevel
                elif annotationLevelForImage != annLevel:
                    annotationLevelForImage = 'mixed'
                    
            categoryID = anno['category_id']
            categoryName = label_map[categoryID]
            if options.add_search_links:
                categoryName = categoryName.replace('"','')
                categoryName = '<a href="https://www.bing.com/images/search?q={}">{}</a>'.format(categoryName,categoryName)
            imageCategories.add(categoryName)
            
            if 'bbox' in anno:
                bbox = anno['bbox']        
                if isinstance(bbox,float):
                    assert math.isnan(bbox), "I shouldn't see a bbox that's neither a box nor NaN"
                    continue
                bboxes.append(bbox)
                boxClasses.append(anno['category_id'])
        
        imageClasses = ', '.join(imageCategories)
                
        file_name = '{}_gt.jpg'.format(img_id.lower().split('.jpg')[0])
        file_name = file_name.replace('/', '~').replace('\\','~').replace(':','~')
        
        rendering_info.append({'bboxes':bboxes, 'boxClasses':boxClasses, 'img_path':img_path,
                               'output_file_name':file_name})
                
        labelLevelString = ''
        if len(annotationLevelForImage) > 0:
            labelLevelString = ' (annotation level: {})'.format(annotationLevelForImage)
            
        if 'frame_num' in img and 'seq_num_frames' in img:
            frameString = ' frame: {} of {}, '.format(img['frame_num'],img['seq_num_frames'])
        else:
            frameString = ' (no sequence information available)'
        
        filename_text = img_relative_path
        if options.include_filename_links:
            filename_text = '<a href="{}">{}</a>'.format(img_path,img_relative_path)
            
        # We're adding html for an image before we render it, so it's possible this image will
        # fail to render.  For applications where this script is being used to debua a database
        # (the common case?), this is useful behavior, for other applications, this is annoying.
        #
        # TODO: optionally write html only for images where rendering succeeded
        images_html.append({
            'filename': '{}/{}'.format('rendered_images', file_name),
            'title': '{}<br/>{}, num boxes: {}, {}class labels: {}{}'.format(
                filename_text, img_id, len(bboxes), frameString, imageClasses, labelLevelString),
            'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5'
        })
    
    # ...for each image

    def render_image_info(rendering_info):
        
        img_path = rendering_info['img_path']
        bboxes = rendering_info['bboxes']
        bboxClasses = rendering_info['boxClasses']
        output_file_name = rendering_info['output_file_name']
        
        if not img_path.startswith('http'):
            if not os.path.exists(img_path):
                print('Image {} cannot be found'.format(img_path))
                return False
            
        try:
            original_image = vis_utils.open_image(img_path)
            original_size = original_image.size
            image = vis_utils.resize_image(original_image, options.viz_size[0], options.viz_size[1])
        except Exception as e:
            print('Image {} failed to open. Error: {}'.format(img_path, e))
            return False
            
        vis_utils.render_db_bounding_boxes(boxes=bboxes, classes=bboxClasses,
                                           image=image, original_size=original_size,
                                           label_map=label_map)
        image.save(os.path.join(output_dir, 'rendered_images', output_file_name))
        return True
    
    # ...def render_image_info
    
    print('Rendering images')
    start_time = time.time()
    if options.parallelize_rendering:
        if options.parallelize_rendering_n_cores is None:
            pool = ThreadPool()
        else:
            print('Rendering images with {} workers'.format(options.parallelize_rendering_n_cores))
            pool = ThreadPool(options.parallelize_rendering_n_cores)
        rendering_success = tqdm(list(pool.imap(render_image_info, rendering_info)), total=len(rendering_info))
    else:
        rendering_success = []
        for file_info in tqdm(rendering_info):        
            rendering_success.append(render_image_info(file_info))
    elapsed = time.time() - start_time
    
    print('Rendered {} images in {} ({} successful)'.format(
        len(rendering_info),humanfriendly.format_timespan(elapsed),sum(rendering_success)))
        
    if options.sort_by_filename:    
        images_html = sorted(images_html, key=lambda x: x['filename'])
        
    htmlOutputFile = os.path.join(output_dir, 'index.html')
    
    htmlOptions = options.htmlOptions
    if isinstance(db_path,str):
        htmlOptions['headerHtml'] = '<h1>Sample annotations from {}</h1>'.format(db_path)
    else:
        htmlOptions['headerHtml'] = '<h1>Sample annotations</h1>'
        
    write_html_image_list(
            filename=htmlOutputFile,
            images=images_html,
            options=htmlOptions)

    print('Visualized {} images, wrote results to {}'.format(len(images_html),htmlOutputFile))
    
    return htmlOutputFile,image_db