evaluation/tiny_benchmark/tools/cityscapes/convert_cityscapes_to_coco.py (175 lines of code) (raw):
#!/usr/bin/env python
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
# This file is copy from https://github.com/facebookresearch/Detectron/tree/master/tools
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import h5py
import json
import os
import scipy.misc
import sys
import cityscapesscripts.evaluation.instances2dict_with_polygons as cs
def parse_args():
parser = argparse.ArgumentParser(description='Convert dataset')
parser.add_argument(
'--dataset', help="cocostuff, cityscapes", default=None, type=str)
parser.add_argument(
'--outdir', help="output dir for json files", default=None, type=str)
parser.add_argument(
'--datadir', help="data dir for annotations to be converted",
default=None, type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
return parser.parse_args()
def poly_to_box(poly):
"""Convert a polygon into a tight bounding box."""
x0 = min(min(p[::2]) for p in poly)
x1 = max(max(p[::2]) for p in poly)
y0 = min(min(p[1::2]) for p in poly)
y1 = max(max(p[1::2]) for p in poly)
box_from_poly = [x0, y0, x1, y1]
return box_from_poly
def xyxy_to_xywh(xyxy_box):
xmin, ymin, xmax, ymax = xyxy_box
TO_REMOVE = 1
xywh_box = (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE)
return xywh_box
def convert_coco_stuff_mat(data_dir, out_dir):
"""Convert to png and save json with path. This currently only contains
the segmentation labels for objects+stuff in cocostuff - if we need to
combine with other labels from original COCO that will be a TODO."""
sets = ['train', 'val']
categories = []
json_name = 'coco_stuff_%s.json'
ann_dict = {}
for data_set in sets:
file_list = os.path.join(data_dir, '%s.txt')
images = []
with open(file_list % data_set) as f:
for img_id, img_name in enumerate(f):
img_name = img_name.replace('coco', 'COCO').strip('\n')
image = {}
mat_file = os.path.join(
data_dir, 'annotations/%s.mat' % img_name)
data = h5py.File(mat_file, 'r')
labelMap = data.get('S')
if len(categories) == 0:
labelNames = data.get('names')
for idx, n in enumerate(labelNames):
categories.append(
{"id": idx, "name": ''.join(chr(i) for i in data[
n[0]])})
ann_dict['categories'] = categories
scipy.misc.imsave(
os.path.join(data_dir, img_name + '.png'), labelMap)
image['width'] = labelMap.shape[0]
image['height'] = labelMap.shape[1]
image['file_name'] = img_name
image['seg_file_name'] = img_name
image['id'] = img_id
images.append(image)
ann_dict['images'] = images
print("Num images: %s" % len(images))
with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:
outfile.write(json.dumps(ann_dict))
# for Cityscapes
def getLabelID(self, instID):
if (instID < 1000):
return instID
else:
return int(instID / 1000)
def convert_cityscapes_instance_only(
data_dir, out_dir):
"""Convert from cityscapes format to COCO instance seg format - polygons"""
sets = [
'gtFine_val',
'gtFine_train',
'gtFine_test',
# 'gtCoarse_train',
# 'gtCoarse_val',
# 'gtCoarse_train_extra'
]
ann_dirs = [
'gtFine_trainvaltest/gtFine/val',
'gtFine_trainvaltest/gtFine/train',
'gtFine_trainvaltest/gtFine/test',
# 'gtCoarse/train',
# 'gtCoarse/train_extra',
# 'gtCoarse/val'
]
json_name = 'instancesonly_filtered_%s.json'
ends_in = '%s_polygons.json'
img_id = 0
ann_id = 0
cat_id = 1
category_dict = {}
category_instancesonly = [
'person',
'rider',
'car',
'truck',
'bus',
'train',
'motorcycle',
'bicycle',
]
for data_set, ann_dir in zip(sets, ann_dirs):
print('Starting %s' % data_set)
ann_dict = {}
images = []
annotations = []
ann_dir = os.path.join(data_dir, ann_dir)
for root, _, files in os.walk(ann_dir):
for filename in files:
if filename.endswith(ends_in % data_set.split('_')[0]):
if len(images) % 50 == 0:
print("Processed %s images, %s annotations" % (
len(images), len(annotations)))
json_ann = json.load(open(os.path.join(root, filename)))
image = {}
image['id'] = img_id
img_id += 1
image['width'] = json_ann['imgWidth']
image['height'] = json_ann['imgHeight']
image['file_name'] = filename[:-len(
ends_in % data_set.split('_')[0])] + 'leftImg8bit.png'
image['seg_file_name'] = filename[:-len(
ends_in % data_set.split('_')[0])] + \
'%s_instanceIds.png' % data_set.split('_')[0]
images.append(image)
fullname = os.path.join(root, image['seg_file_name'])
objects = cs.instances2dict_with_polygons(
[fullname], verbose=False)[fullname]
for object_cls in objects:
if object_cls not in category_instancesonly:
continue # skip non-instance categories
for obj in objects[object_cls]:
if obj['contours'] == []:
print('Warning: empty contours.')
continue # skip non-instance categories
len_p = [len(p) for p in obj['contours']]
if min(len_p) <= 4:
print('Warning: invalid contours.')
continue # skip non-instance categories
ann = {}
ann['id'] = ann_id
ann_id += 1
ann['image_id'] = image['id']
ann['segmentation'] = obj['contours']
if object_cls not in category_dict:
category_dict[object_cls] = cat_id
cat_id += 1
ann['category_id'] = category_dict[object_cls]
ann['iscrowd'] = 0
ann['area'] = obj['pixelCount']
xyxy_box = poly_to_box(ann['segmentation'])
xywh_box = xyxy_to_xywh(xyxy_box)
ann['bbox'] = xywh_box
annotations.append(ann)
ann_dict['images'] = images
categories = [{"id": category_dict[name], "name": name} for name in
category_dict]
ann_dict['categories'] = categories
ann_dict['annotations'] = annotations
print("Num categories: %s" % len(categories))
print("Num images: %s" % len(images))
print("Num annotations: %s" % len(annotations))
with open(os.path.join(out_dir, json_name % data_set), 'w') as outfile:
outfile.write(json.dumps(ann_dict))
if __name__ == '__main__':
args = parse_args()
if args.dataset == "cityscapes_instance_only":
convert_cityscapes_instance_only(args.datadir, args.outdir)
elif args.dataset == "cocostuff":
convert_coco_stuff_mat(args.datadir, args.outdir)
else:
print("Dataset not supported: %s" % args.dataset)