in extra_scripts/datasets/create_voc_data_files.py [0:0]
def get_images_labels_info(split, args):
assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort"
data_files = get_data_files(split, args)
# we will construct a map for image name to the vector of -1, 0, 1
# we sort the data_files which gives sorted class names as well
img_labels_map = {}
for cls_num, data_path in enumerate(sorted(data_files)):
# for this class, we have images and each image will have label
# 1, -1, 0 -> present, not present, ignore respectively as in VOC data.
with g_pathmgr.open(data_path, "r") as fopen:
for line in fopen:
try:
img_name, orig_label = line.strip().split()
if img_name not in img_labels_map:
img_labels_map[img_name] = -(
np.ones(len(data_files), dtype=np.int32)
)
orig_label = int(orig_label)
# in VOC data, -1 (not present), set it to 0 as train target
if orig_label == -1:
orig_label = 0
# in VOC data, 0 (ignore), set it to -1 as train target
elif orig_label == 0:
orig_label = -1
img_labels_map[img_name][cls_num] = orig_label
except Exception:
logger.info(
"Error processing: {} data_path: {}".format(line, data_path)
)
img_paths, img_labels = [], []
for item in sorted(img_labels_map.keys()):
img_paths.append(f"{args.data_source_dir}/JPEGImages/{item}.jpg")
img_labels.append(img_labels_map[item])
output_dict = {}
if args.generate_json:
cls_names = []
for item in sorted(data_files):
name = item.split("/")[-1].split(".")[0].split("_")[0]
cls_names.append(name)
img_ids, json_img_labels = [], []
for item in sorted(img_labels_map.keys()):
img_ids.append(item)
json_img_labels.append(img_labels_map[item])
for img_idx in range(len(img_ids)):
img_id = img_ids[img_idx]
out_lbl = {}
for cls_idx in range(len(cls_names)):
name = cls_names[cls_idx]
out_lbl[name] = int(json_img_labels[img_idx][cls_idx])
output_dict[img_id] = out_lbl
return img_paths, img_labels, output_dict