def XML2JSON()

in archived/visual_object_detection/src/xml2json.py [0:0]


def XML2JSON(xmlFiles, test_ratio=None, rnd_seed=100):
    """ Convert all xmls to annotations.json

    If the test_ratio is not None, convert to two annotations.json files,
    one for train+val, another one for test.
    """

    images = list()
    annotations = list()
    image_id = 1
    annotation_id = 1
    for file in xmlFiles:
        annotation_path = file
        image = dict()
        with open(annotation_path) as fd:
            doc = xmltodict.parse(fd.read(), force_list=('object'))
        filename = str(doc['annotation']['filename'])
        image['file_name'] = filename if filename.endswith('.jpg') else filename + '.jpg'
        image['height'] = int(doc['annotation']['size']['height'])
        image['width'] = int(doc['annotation']['size']['width'])
        image['id'] = image_id
#         print("File Name: {} and image_id {}".format(file, image_id))
        images.append(image)
        if 'object' in doc['annotation']:
            for obj in doc['annotation']['object']:
                for value in categories:
                    annotation = dict()
                    if str(obj['name']) == value["name"]:
                        annotation["image_id"] = image_id
                        xmin = int(obj["bndbox"]["xmin"])
                        ymin = int(obj["bndbox"]["ymin"])
                        xmax = int(obj["bndbox"]["xmax"])
                        ymax = int(obj["bndbox"]["ymax"])
                        annotation["bbox"] = [xmin, ymin, xmax, ymax]
                        annotation["category_id"] = value["id"]
                        annotation["id"] = annotation_id
                        annotation_id += 1
                        annotations.append(annotation)

        else:
            print("File: {} doesn't have any object".format(file))

        image_id += 1

    if test_ratio is None:
        attrDict = dict()
        attrDict["images"] = images
        attrDict["annotations"] = annotations

        jsonString = json.dumps(attrDict)
        with open("annotations.json", "w") as f:
            f.write(jsonString)
    else:
        assert test_ratio < 1.0

        # Size of each class
        category_ids = defaultdict(list)
        for img in images:
            category = img['file_name'].split('_')[0]
            category_ids[category].append(img['id'])
        print('\ncategory\tnum of images')
        print('-' * 20)

        random.seed(rnd_seed)

        train_val_images = []
        test_images = []
        train_val_annotations = []
        test_annotations = []

        for category in category_ids.keys():
            print(f"{category}:\t{len(category_ids[category])}")

            random.shuffle(category_ids[category])
            N = len(category_ids[category])
            ids = category_ids[category]

            sep = int(N * test_ratio)

            category_images = [img for img in images if img['id'] in ids[:sep]]
            test_images.extend(category_images)
            category_images = [img for img in images if img['id'] in ids[sep:]]
            train_val_images.extend(category_images)

            category_annotations = [ann for ann in annotations if ann['image_id'] in ids[:sep]]
            test_annotations.extend(category_annotations)
            category_annotations = [ann for ann in annotations if ann['image_id'] in ids[sep:]]
            train_val_annotations.extend(category_annotations)

        print('-' * 20)

        train_val_attrDict = dict()
        train_val_attrDict["images"] = train_val_images
        train_val_attrDict["annotations"] = train_val_annotations
        print(f"\ntrain_val:\t{len(train_val_images)}")

        train_val_jsonString = json.dumps(train_val_attrDict)
        with open("annotations.json", "w") as f:
            f.write(train_val_jsonString)

        test_attDict = dict()
        test_attDict["images"] = test_images
        test_attDict["annotations"] = test_annotations
        print(f"test:\t{len(test_images)}")

        test_jsonString = json.dumps(test_attDict)
        with open("test_annotations.json", "w") as f:
            f.write(test_jsonString)