in src/autotrain/preprocessor/vision.py [0:0]
def __post_init__(self):
# Check if train data path exists
if not os.path.exists(self.train_data):
raise ValueError(f"{self.train_data} does not exist.")
# Check if train data path contains at least 2 folders
subfolders = [f.path for f in os.scandir(self.train_data) if f.is_dir()]
# list subfolders
if len(subfolders) < 2:
raise ValueError(f"{self.train_data} should contain at least 2 subfolders.")
# Check if each subfolder contains at least 5 image files in jpeg, png or jpg format only
for subfolder in subfolders:
image_files = [f for f in os.listdir(subfolder) if f.endswith(ALLOWED_EXTENSIONS)]
if len(image_files) < 5:
raise ValueError(f"{subfolder} should contain at least 5 jpeg, png or jpg files.")
# Check if there are no other files except image files in the subfolder
if len(image_files) != len(os.listdir(subfolder)):
raise ValueError(f"{subfolder} should not contain any other files except image files.")
# Check if there are no subfolders inside subfolders
subfolders_in_subfolder = [f.path for f in os.scandir(subfolder) if f.is_dir()]
if len(subfolders_in_subfolder) > 0:
raise ValueError(f"{subfolder} should not contain any subfolders.")
if self.valid_data:
# Check if valid data path exists
if not os.path.exists(self.valid_data):
raise ValueError(f"{self.valid_data} does not exist.")
# Check if valid data path contains at least 2 folders
subfolders = [f.path for f in os.scandir(self.valid_data) if f.is_dir()]
# make sure that the subfolders in train and valid data are the same
train_subfolders = set(os.path.basename(f.path) for f in os.scandir(self.train_data) if f.is_dir())
valid_subfolders = set(os.path.basename(f.path) for f in os.scandir(self.valid_data) if f.is_dir())
if train_subfolders != valid_subfolders:
raise ValueError(f"{self.valid_data} should have the same subfolders as {self.train_data}.")
if len(subfolders) < 2:
raise ValueError(f"{self.valid_data} should contain at least 2 subfolders.")
# Check if each subfolder contains at least 5 image files in jpeg, png or jpg format only
for subfolder in subfolders:
image_files = [f for f in os.listdir(subfolder) if f.endswith(ALLOWED_EXTENSIONS)]
if len(image_files) < 5:
raise ValueError(f"{subfolder} should contain at least 5 jpeg, png or jpg files.")
# Check if there are no other files except image files in the subfolder
if len(image_files) != len(os.listdir(subfolder)):
raise ValueError(f"{subfolder} should not contain any other files except image files.")
# Check if there are no subfolders inside subfolders
subfolders_in_subfolder = [f.path for f in os.scandir(subfolder) if f.is_dir()]
if len(subfolders_in_subfolder) > 0:
raise ValueError(f"{subfolder} should not contain any subfolders.")