in archive/data_management/tfrecords/make_tfrecords_from_vott.py [0:0]
def __init__(self, root, class_names = [], store_empty_images=False):
self.root = root
print('Loading images from folder ' + root)
# set up the filenames and annotations
old_dir = os.getcwd()
os.chdir(root)
self.impaths = sorted([fi for ext in ['*.jpg', '*.JPG', '*.jpeg', '*.JPEG', '*.PNG', '*.png'] for fi in glob.glob(ext)])
print('Found {} images'.format(len(self.impaths)))
self.image_ids = list(range(len(self.impaths)))
# This loop reads the bboxes and corresponding labels and assigns them
# the correct image. Kind of slow at the moment...
self.bboxes = [[] for _ in self.image_ids]
self.labels = [[] for _ in self.image_ids]
self.class_names = class_names
empty_images = []
for image_id, impath in enumerate(self.impaths):
with open(os.path.splitext(impath)[0] + '.bboxes.labels.tsv', 'rt') as labelfile:
bbox_labels = labelfile.read().splitlines()
# If needed: merging all classes
#bbox_labels = ['Animal' for _ in bbox_labels]
# BBox coords are stored in the format
# x_min (of width axis) y_min (of height axis), x_max, y_max
# Coordinate system starts in top left corner
bbox_coords = np.loadtxt(os.path.splitext(impath)[0] + '.bboxes.tsv', dtype=np.int32)
if len(bbox_coords.shape) == 1 and bbox_coords.size > 0:
bbox_coords = bbox_coords[None,:]
assert len(bbox_coords) == len(bbox_labels)
width,height = Image.open(self.impaths[image_id]).size
for i in range(len(bbox_coords)):
if bbox_labels[i] not in self.class_names:
self.class_names.append(bbox_labels[i])
bb = bbox_coords[i]
if np.all(bb >= 0) and bb[0] <= width and bb[2] <= width and bb[1] <= height and bb[3] <= height and bb[0] < bb[2] and bb[1] < bb[3]:
# In this framework, we need ('ymin', 'xmin', 'ymax', 'xmax') format
self.bboxes[image_id].append([bb[1],bb[0],bb[3],bb[2]])
self.labels[image_id].append(self.class_names.index(bbox_labels[i]))
if len(self.bboxes[image_id]) == 0:
empty_images.append(image_id)
if not store_empty_images:
for empty_image_id in empty_images[::-1]:
print("Deleting image {} as all bounding boxes are outside".format(empty_image_id) + \
"of the image or no bounding boxes are provided")
del self.impaths[empty_image_id]
del self.image_ids[empty_image_id]
del self.bboxes[empty_image_id]
del self.labels[empty_image_id]
self.classes = list(range(len(self.class_names)))
# print out some stats
print("The dataset has {} images containing {} classes".format(
len(self.image_ids),
len(self.classes)))
os.chdir(old_dir)
# To make sure we loaded the bboxes correctly:
#self.validate_bboxes()
print("All checks passed")