in archived/visual_object_detection/src/xml2json.py [0:0]
def XML2JSON(xmlFiles, test_ratio=None, rnd_seed=100):
""" Convert all xmls to annotations.json
If the test_ratio is not None, convert to two annotations.json files,
one for train+val, another one for test.
"""
images = list()
annotations = list()
image_id = 1
annotation_id = 1
for file in xmlFiles:
annotation_path = file
image = dict()
with open(annotation_path) as fd:
doc = xmltodict.parse(fd.read(), force_list=('object'))
filename = str(doc['annotation']['filename'])
image['file_name'] = filename if filename.endswith('.jpg') else filename + '.jpg'
image['height'] = int(doc['annotation']['size']['height'])
image['width'] = int(doc['annotation']['size']['width'])
image['id'] = image_id
# print("File Name: {} and image_id {}".format(file, image_id))
images.append(image)
if 'object' in doc['annotation']:
for obj in doc['annotation']['object']:
for value in categories:
annotation = dict()
if str(obj['name']) == value["name"]:
annotation["image_id"] = image_id
xmin = int(obj["bndbox"]["xmin"])
ymin = int(obj["bndbox"]["ymin"])
xmax = int(obj["bndbox"]["xmax"])
ymax = int(obj["bndbox"]["ymax"])
annotation["bbox"] = [xmin, ymin, xmax, ymax]
annotation["category_id"] = value["id"]
annotation["id"] = annotation_id
annotation_id += 1
annotations.append(annotation)
else:
print("File: {} doesn't have any object".format(file))
image_id += 1
if test_ratio is None:
attrDict = dict()
attrDict["images"] = images
attrDict["annotations"] = annotations
jsonString = json.dumps(attrDict)
with open("annotations.json", "w") as f:
f.write(jsonString)
else:
assert test_ratio < 1.0
# Size of each class
category_ids = defaultdict(list)
for img in images:
category = img['file_name'].split('_')[0]
category_ids[category].append(img['id'])
print('\ncategory\tnum of images')
print('-' * 20)
random.seed(rnd_seed)
train_val_images = []
test_images = []
train_val_annotations = []
test_annotations = []
for category in category_ids.keys():
print(f"{category}:\t{len(category_ids[category])}")
random.shuffle(category_ids[category])
N = len(category_ids[category])
ids = category_ids[category]
sep = int(N * test_ratio)
category_images = [img for img in images if img['id'] in ids[:sep]]
test_images.extend(category_images)
category_images = [img for img in images if img['id'] in ids[sep:]]
train_val_images.extend(category_images)
category_annotations = [ann for ann in annotations if ann['image_id'] in ids[:sep]]
test_annotations.extend(category_annotations)
category_annotations = [ann for ann in annotations if ann['image_id'] in ids[sep:]]
train_val_annotations.extend(category_annotations)
print('-' * 20)
train_val_attrDict = dict()
train_val_attrDict["images"] = train_val_images
train_val_attrDict["annotations"] = train_val_annotations
print(f"\ntrain_val:\t{len(train_val_images)}")
train_val_jsonString = json.dumps(train_val_attrDict)
with open("annotations.json", "w") as f:
f.write(train_val_jsonString)
test_attDict = dict()
test_attDict["images"] = test_images
test_attDict["annotations"] = test_annotations
print(f"test:\t{len(test_images)}")
test_jsonString = json.dumps(test_attDict)
with open("test_annotations.json", "w") as f:
f.write(test_jsonString)