in datasets.py [0:0]
def make_subsampled_dataset(
directory, class_to_idx, extensions=None,is_valid_file=None, sampling_ratio=1., nb_classes=None):
instances = []
directory = os.path.expanduser(directory)
both_none = extensions is None and is_valid_file is None
both_something = extensions is not None and is_valid_file is not None
if both_none or both_something:
raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time")
if extensions is not None:
def is_valid_file(x: str) -> bool:
return has_file_allowed_extension(x, cast(Tuple[str, ...], extensions))
is_valid_file = cast(Callable[[str], bool], is_valid_file)
for i, target_class in enumerate(sorted(class_to_idx.keys())):
if nb_classes is not None and i>=nb_classes:
break
class_index = class_to_idx[target_class]
target_dir = os.path.join(directory, target_class)
if not os.path.isdir(target_dir):
continue
num_imgs = int(len(os.listdir(target_dir))*sampling_ratio)
imgs=0
for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
for fname in sorted(fnames):
if imgs==num_imgs :
break
path = os.path.join(root, fname)
if is_valid_file(path):
item = path, class_index
instances.append(item)
imgs+=1
return instances