src/autotrain/dataset.py [209:254]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if not self.valid_data and self.percent_valid is None:
            self.percent_valid = 0.2
        elif self.valid_data and self.percent_valid is not None:
            raise ValueError("You can only specify one of valid_data or percent_valid")
        elif self.valid_data:
            self.percent_valid = 0.0

    def prepare(self):
        valid_dir = None
        if not isinstance(self.train_data, str):
            cache_dir = os.environ.get("HF_HOME")
            if not cache_dir:
                cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "huggingface")

            random_uuid = uuid.uuid4()
            train_dir = os.path.join(cache_dir, "autotrain", str(random_uuid))
            os.makedirs(train_dir, exist_ok=True)
            self.train_data.seek(0)
            content = self.train_data.read()
            bytes_io = io.BytesIO(content)

            zip_ref = zipfile.ZipFile(bytes_io, "r")
            zip_ref.extractall(train_dir)
            # remove the __MACOSX directory
            macosx_dir = os.path.join(train_dir, "__MACOSX")
            if os.path.exists(macosx_dir):
                os.system(f"rm -rf {macosx_dir}")
            remove_non_image_files(train_dir)
            if self.valid_data:
                random_uuid = uuid.uuid4()
                valid_dir = os.path.join(cache_dir, "autotrain", str(random_uuid))
                os.makedirs(valid_dir, exist_ok=True)
                self.valid_data.seek(0)
                content = self.valid_data.read()
                bytes_io = io.BytesIO(content)
                zip_ref = zipfile.ZipFile(bytes_io, "r")
                zip_ref.extractall(valid_dir)
                # remove the __MACOSX directory
                macosx_dir = os.path.join(valid_dir, "__MACOSX")
                if os.path.exists(macosx_dir):
                    os.system(f"rm -rf {macosx_dir}")
                remove_non_image_files(valid_dir)
        else:
            train_dir = self.train_data
            if self.valid_data:
                valid_dir = self.valid_data
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/autotrain/dataset.py [420:465]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if not self.valid_data and self.percent_valid is None:
            self.percent_valid = 0.2
        elif self.valid_data and self.percent_valid is not None:
            raise ValueError("You can only specify one of valid_data or percent_valid")
        elif self.valid_data:
            self.percent_valid = 0.0

    def prepare(self):
        valid_dir = None
        if not isinstance(self.train_data, str):
            cache_dir = os.environ.get("HF_HOME")
            if not cache_dir:
                cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "huggingface")

            random_uuid = uuid.uuid4()
            train_dir = os.path.join(cache_dir, "autotrain", str(random_uuid))
            os.makedirs(train_dir, exist_ok=True)
            self.train_data.seek(0)
            content = self.train_data.read()
            bytes_io = io.BytesIO(content)

            zip_ref = zipfile.ZipFile(bytes_io, "r")
            zip_ref.extractall(train_dir)
            # remove the __MACOSX directory
            macosx_dir = os.path.join(train_dir, "__MACOSX")
            if os.path.exists(macosx_dir):
                os.system(f"rm -rf {macosx_dir}")
            remove_non_image_files(train_dir)
            if self.valid_data:
                random_uuid = uuid.uuid4()
                valid_dir = os.path.join(cache_dir, "autotrain", str(random_uuid))
                os.makedirs(valid_dir, exist_ok=True)
                self.valid_data.seek(0)
                content = self.valid_data.read()
                bytes_io = io.BytesIO(content)
                zip_ref = zipfile.ZipFile(bytes_io, "r")
                zip_ref.extractall(valid_dir)
                # remove the __MACOSX directory
                macosx_dir = os.path.join(valid_dir, "__MACOSX")
                if os.path.exists(macosx_dir):
                    os.system(f"rm -rf {macosx_dir}")
                remove_non_image_files(valid_dir)
        else:
            train_dir = self.train_data
            if self.valid_data:
                valid_dir = self.valid_data
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



