in src/huggingface_hub/repository.py [0:0]
def clone_from(self, repo_url: str, token: Union[bool, str, None] = None):
"""
Clone from a remote. If the folder already exists, will try to clone the
repository within it.
If this folder is a git repository with linked history, will try to
update the repository.
Args:
repo_url (`str`):
The URL from which to clone the repository
token (`Union[str, bool]`, *optional*):
Whether to use the authentication token. It can be:
- a string which is the token itself
- `False`, which would not use the authentication token
- `True`, which would fetch the authentication token from the
local folder and use it (you should be logged in for this to
work).
- `None`, which would retrieve the value of
`self.huggingface_token`.
<Tip>
Raises the following error:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
if an organization token (starts with "api_org") is passed. Use must use
your own personal access token (see https://hf.co/settings/tokens).
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
if you are trying to clone the repository in a non-empty folder, or if the
`git` operations raise errors.
</Tip>
"""
token = (
token # str -> use it
if isinstance(token, str)
else (
None # `False` -> explicit no token
if token is False
else self.huggingface_token # `None` or `True` -> use default
)
)
if token is not None and token.startswith("api_org"):
raise ValueError(
"You must use your personal access token, not an Organization token"
" (see https://hf.co/settings/tokens)."
)
hub_url = self.client.endpoint
if hub_url in repo_url or ("http" not in repo_url and len(repo_url.split("/")) <= 2):
repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(repo_url, hub_url=hub_url)
repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name
if repo_type is not None:
self._repo_type = repo_type
repo_url = hub_url + "/"
if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
if token is not None:
# Add token in git url when provided
scheme = urlparse(repo_url).scheme
repo_url = repo_url.replace(f"{scheme}://", f"{scheme}://user:{token}@")
repo_url += repo_id
# For error messages, it's cleaner to show the repo url without the token.
clean_repo_url = re.sub(r"(https?)://.*@", r"\1://", repo_url)
try:
run_subprocess("git lfs install", self.local_dir)
# checks if repository is initialized in a empty repository or in one with files
if len(os.listdir(self.local_dir)) == 0:
logger.warning(f"Cloning {clean_repo_url} into local empty directory.")
with _lfs_log_progress():
env = os.environ.copy()
if self.skip_lfs_files:
env.update({"GIT_LFS_SKIP_SMUDGE": "1"})
run_subprocess(
# 'git lfs clone' is deprecated (will display a warning in the terminal)
# but we still use it as it provides a nicer UX when downloading large
# files (shows progress).
f"{'git clone' if self.skip_lfs_files else 'git lfs clone'} {repo_url} .",
self.local_dir,
env=env,
)
else:
# Check if the folder is the root of a git repository
if not is_git_repo(self.local_dir):
raise EnvironmentError(
"Tried to clone a repository in a non-empty folder that isn't"
f" a git repository ('{self.local_dir}'). If you really want to"
f" do this, do it manually:\n cd {self.local_dir} && git init"
" && git remote add origin && git pull origin main\n or clone"
" repo to a new folder and move your existing files there"
" afterwards."
)
if is_local_clone(self.local_dir, repo_url):
logger.warning(
f"{self.local_dir} is already a clone of {clean_repo_url}."
" Make sure you pull the latest changes with"
" `repo.git_pull()`."
)
else:
output = run_subprocess("git remote get-url origin", self.local_dir, check=False)
error_msg = (
f"Tried to clone {clean_repo_url} in an unrelated git"
" repository.\nIf you believe this is an error, please add"
f" a remote with the following URL: {clean_repo_url}."
)
if output.returncode == 0:
clean_local_remote_url = re.sub(r"https://.*@", "https://", output.stdout)
error_msg += f"\nLocal path has its origin defined as: {clean_local_remote_url}"
raise EnvironmentError(error_msg)
except subprocess.CalledProcessError as exc:
raise EnvironmentError(exc.stderr)