def convert_abstract_to_coco()

in maskrcnn_benchmark/data/datasets/evaluation/coco/abs_to_coco.py [0:0]


def convert_abstract_to_coco(dataset, num_workers=None, chunksize=100):
    """
    Convert any dataset derived from AbstractDataset to COCO style
    for evaluating with the pycocotools lib

    Conversion imitates required fields of COCO instance segmentation
    ground truth files like: ".../annotations/instances_train2014.json"

    After th conversion is done a dict is returned that follows the same
    format as COCO json files.

    By default .coco_eval_wrapper.py saves it to the hard-drive in json format
    and loads it with the maskrcnn_benchmark's default COCODataset

    Args:
        dataset: any dataset derived from AbstractDataset
        num_workers (optional): number of worker threads to parallelize the
            conversion (default is to use all cores for conversion)
        chunk_size (optional): how many entries one thread processes before
            requesting new task. The larger the less overhead there is.
    """

    logger = logging.getLogger("maskrcnn_benchmark.inference")
    assert isinstance(dataset, AbstractDataset)
    # Official COCO annotations have these fields
    # 'info', 'licenses', 'images', 'type', 'annotations', 'categories'
    coco_dict = {}
    coco_dict["info"] = {
        "description": (
            "This is an automatically generated COCO annotation"
            " file using maskrcnn_benchmark"
        ),
        "date_created": "%s" % datetime.now(),
    }
    coco_dict["type"] = "instances"

    images = []
    annotations = []

    if num_workers is None:
        num_workers = torch.multiprocessing.cpu_count()
    else:
        num_workers = min(num_workers, torch.multiprocessing.cpu_count())

    dataset_name = dataset.__class__.__name__
    num_images = len(dataset)
    logger.info(
        (
            "Parsing each entry in "
            "%s, total=%d. "
            "Using N=%d workers and chunksize=%d"
        )
        % (dataset_name, num_images, num_workers, chunksize)
    )

    with torch.multiprocessing.Pool(num_workers) as pool:
        with tqdm(total=num_images) as progress_bar:
            args = [(dataset, idx) for idx in range(num_images)]
            iterator = pool.imap(process_single_image, args, chunksize=100)
            for img_annots_pair in iterator:
                image, per_img_annotations = img_annots_pair

                images.append(image)
                annotations.extend(per_img_annotations)
                progress_bar.update()

    for ann_id, ann in enumerate(annotations, 1):
        ann["id"] = ann_id

    logger.info("Parsing categories:")
    # CATEGORY DATA
    categories = [
        {"id": category_id, "name": name}
        for category_id, name in dataset.id_to_name.items()
        if name != "__background__"
    ]
    # Logging categories
    for cat in categories:
        logger.info(str(cat))

    coco_dict["images"] = images
    coco_dict["annotations"] = annotations
    coco_dict["categories"] = categories
    return coco_dict