def __post_init__()

in src/autotrain/preprocessor/vision.py [0:0]


    def __post_init__(self):
        # Check if train data path exists
        if not os.path.exists(self.train_data):
            raise ValueError(f"{self.train_data} does not exist.")

        # Check if train data path contains at least 2 folders
        subfolders = [f.path for f in os.scandir(self.train_data) if f.is_dir()]
        # list subfolders
        if len(subfolders) < 2:
            raise ValueError(f"{self.train_data} should contain at least 2 subfolders.")

        # Check if each subfolder contains at least 5 image files in jpeg, png or jpg format only
        for subfolder in subfolders:
            image_files = [f for f in os.listdir(subfolder) if f.endswith(ALLOWED_EXTENSIONS)]
            if len(image_files) < 5:
                raise ValueError(f"{subfolder} should contain at least 5 jpeg, png or jpg files.")
            # Check if there are no other files except image files in the subfolder
            if len(image_files) != len(os.listdir(subfolder)):
                raise ValueError(f"{subfolder} should not contain any other files except image files.")

            # Check if there are no subfolders inside subfolders
            subfolders_in_subfolder = [f.path for f in os.scandir(subfolder) if f.is_dir()]
            if len(subfolders_in_subfolder) > 0:
                raise ValueError(f"{subfolder} should not contain any subfolders.")

        if self.valid_data:
            # Check if valid data path exists
            if not os.path.exists(self.valid_data):
                raise ValueError(f"{self.valid_data} does not exist.")

            # Check if valid data path contains at least 2 folders
            subfolders = [f.path for f in os.scandir(self.valid_data) if f.is_dir()]

            # make sure that the subfolders in train and valid data are the same
            train_subfolders = set(os.path.basename(f.path) for f in os.scandir(self.train_data) if f.is_dir())
            valid_subfolders = set(os.path.basename(f.path) for f in os.scandir(self.valid_data) if f.is_dir())
            if train_subfolders != valid_subfolders:
                raise ValueError(f"{self.valid_data} should have the same subfolders as {self.train_data}.")

            if len(subfolders) < 2:
                raise ValueError(f"{self.valid_data} should contain at least 2 subfolders.")

            # Check if each subfolder contains at least 5 image files in jpeg, png or jpg format only
            for subfolder in subfolders:
                image_files = [f for f in os.listdir(subfolder) if f.endswith(ALLOWED_EXTENSIONS)]
                if len(image_files) < 5:
                    raise ValueError(f"{subfolder} should contain at least 5 jpeg, png or jpg files.")

                # Check if there are no other files except image files in the subfolder
                if len(image_files) != len(os.listdir(subfolder)):
                    raise ValueError(f"{subfolder} should not contain any other files except image files.")

                # Check if there are no subfolders inside subfolders
                subfolders_in_subfolder = [f.path for f in os.scandir(subfolder) if f.is_dir()]
                if len(subfolders_in_subfolder) > 0:
                    raise ValueError(f"{subfolder} should not contain any subfolders.")