in scripts/data_preparation/prepare_vistas.py [0:0]
def main(args):
print("Loading Vistas from", args.root_dir)
# Process meta-data
categories, version = _load_metadata(args.root_dir)
cat_id_mvd_to_iss, cat_id_iss_to_mvd, num_stuff, num_thing = _cat_id_maps(categories)
# Prepare directories
lst_dir = path.join(args.out_dir, "lst")
_ensure_dir(lst_dir)
coco_dir = path.join(args.out_dir, "coco")
_ensure_dir(coco_dir)
# Run conversion
images = []
for split in _SPLITS:
print("Converting", split, "...")
# Find all image ids in the split
img_ids = []
for name in glob.glob(path.join(args.root_dir, split, _IMAGES_DIR, "*." + _IMAGES_EXT)):
_, name = path.split(name)
img_ids.append(name[:-(1 + len(_IMAGES_EXT))])
# Write the list file
with open(path.join(lst_dir, split + ".txt"), "w") as fid:
fid.writelines(img_id + "\n" for img_id in img_ids)
# Convert to COCO detection format
coco_out = {
"info": {"version": str(version)},
"images": [],
"categories": [],
"annotations": []
}
for cat_id, cat_meta in enumerate(categories):
if cat_meta["instances"]:
coco_out["categories"].append({
"id": cat_id_mvd_to_iss[cat_id],
"name": cat_meta["name"]
})
# Process images in parallel
worker = _Worker(categories, cat_id_mvd_to_iss, path.join(args.root_dir, split), args.out_dir)
with Pool(initializer=_init_counter, initargs=(_Counter(0),)) as pool:
total = len(img_ids)
for img_meta, coco_img, coco_ann in tqdm.tqdm(pool.imap(worker, img_ids, 8), total=total):
images.append(img_meta)
# COCO annotation
coco_out["images"].append(coco_img)
coco_out["annotations"] += coco_ann
# Write COCO detection format annotation
with open(path.join(coco_dir, split + ".json"), "w") as fid:
json.dump(coco_out, fid)
# Write meta-data
print("Writing meta-data")
meta = {
"images": images,
"meta": {
"num_stuff": num_stuff,
"num_thing": num_thing
}
}
meta["meta"]["categories"] = ["" for _ in range(num_stuff + num_thing)]
meta["meta"]["palette"] = [[0, 0, 0] for _ in range(num_stuff + num_thing)]
meta["meta"]["original_ids"] = [0 for _ in range(num_stuff + num_thing)]
for cat_id, cat_meta in enumerate(categories):
if not cat_meta["evaluate"]:
continue
mapped_id = cat_id_mvd_to_iss[cat_id]
meta["meta"]["categories"][mapped_id] = cat_meta["name"]
meta["meta"]["palette"][mapped_id] = cat_meta["color"]
meta["meta"]["original_ids"][mapped_id] = cat_id
with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid:
umsgpack.dump(meta, fid, encoding="utf-8")