in maskrcnn_benchmark/data/datasets/evaluation/coco/abs_to_coco.py [0:0]
def convert_abstract_to_coco(dataset, num_workers=None, chunksize=100):
"""
Convert any dataset derived from AbstractDataset to COCO style
for evaluating with the pycocotools lib
Conversion imitates required fields of COCO instance segmentation
ground truth files like: ".../annotations/instances_train2014.json"
After th conversion is done a dict is returned that follows the same
format as COCO json files.
By default .coco_eval_wrapper.py saves it to the hard-drive in json format
and loads it with the maskrcnn_benchmark's default COCODataset
Args:
dataset: any dataset derived from AbstractDataset
num_workers (optional): number of worker threads to parallelize the
conversion (default is to use all cores for conversion)
chunk_size (optional): how many entries one thread processes before
requesting new task. The larger the less overhead there is.
"""
logger = logging.getLogger("maskrcnn_benchmark.inference")
assert isinstance(dataset, AbstractDataset)
# Official COCO annotations have these fields
# 'info', 'licenses', 'images', 'type', 'annotations', 'categories'
coco_dict = {}
coco_dict["info"] = {
"description": (
"This is an automatically generated COCO annotation"
" file using maskrcnn_benchmark"
),
"date_created": "%s" % datetime.now(),
}
coco_dict["type"] = "instances"
images = []
annotations = []
if num_workers is None:
num_workers = torch.multiprocessing.cpu_count()
else:
num_workers = min(num_workers, torch.multiprocessing.cpu_count())
dataset_name = dataset.__class__.__name__
num_images = len(dataset)
logger.info(
(
"Parsing each entry in "
"%s, total=%d. "
"Using N=%d workers and chunksize=%d"
)
% (dataset_name, num_images, num_workers, chunksize)
)
with torch.multiprocessing.Pool(num_workers) as pool:
with tqdm(total=num_images) as progress_bar:
args = [(dataset, idx) for idx in range(num_images)]
iterator = pool.imap(process_single_image, args, chunksize=100)
for img_annots_pair in iterator:
image, per_img_annotations = img_annots_pair
images.append(image)
annotations.extend(per_img_annotations)
progress_bar.update()
for ann_id, ann in enumerate(annotations, 1):
ann["id"] = ann_id
logger.info("Parsing categories:")
# CATEGORY DATA
categories = [
{"id": category_id, "name": name}
for category_id, name in dataset.id_to_name.items()
if name != "__background__"
]
# Logging categories
for cat in categories:
logger.info(str(cat))
coco_dict["images"] = images
coco_dict["annotations"] = annotations
coco_dict["categories"] = categories
return coco_dict