in src/datasets/folder.py [0:0]
def __init__(self, root, loader, extensions, transform=None, target_transform=None):
start = time.time()
path_cache = CACHE_DATASET[root]
if not os.path.isfile(path_cache):
print("Images cache not found in %s, parsing dataset..." % path_cache,
file=sys.stderr)
classes, class_to_idx = self._find_classes(root)
samples = make_dataset(root, class_to_idx, extensions)
print("Parsing image folder took %.2f seconds." % (time.time() - start),
file=sys.stderr)
with open(path_cache, "wb") as f:
pickle.dump((classes, class_to_idx, samples), f)
else:
with open(path_cache, "rb") as f:
classes, class_to_idx, samples = pickle.load(f)
print("Loading cached images took %.2f seconds." % (time.time() - start),
file=sys.stderr)
print("Dataset contains %i images." % len(samples), file=sys.stderr)
if len(samples) == 0:
raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"
"Supported extensions are: " + ",".join(extensions)))
self.root = root
self.loader = loader
self.extensions = extensions
self.classes = classes
self.class_to_idx = class_to_idx
self.samples = samples
self.targets = np.array([s[1] for s in samples])
self.transform = transform
self.target_transform = target_transform
# Samples are grouped by class contiguously
assert np.all(0 <= self.targets[1:] - self.targets[:-1])
assert np.all(self.targets[1:] - self.targets[:-1] <= 1)
assert np.sum(self.targets[1:] - self.targets[:-1]) == max(self.targets)
cl_positions = np.nonzero(self.targets[1:] - self.targets[:-1])[0] + 1
cl_positions = np.insert(cl_positions, 0, 0)
cl_positions = np.append(cl_positions, len(self.targets))
self.class2position = {i: np.arange(cl_positions[i], cl_positions[i+1]) for i in range(len(cl_positions) - 1)}
assert all([all(self.targets[v] == i for v in self.class2position[i]) for i in range(max(self.targets) + 1)])