s3_management/backup_conda.py (40 lines of code) (raw):

#!/usr/bin/env python3 # Downloads domain library packages from channel # And backs them up to S3 # Do not use unless you know what you are doing import conda.api import boto3 from typing import List, Optional import urllib import os import hashlib S3 = boto3.resource('s3') BUCKET = S3.Bucket('pytorch-backup') _known_subdirs = ["linux-64", "osx-64", "osx-arm64", "win-64"] def compute_md5(path:str) -> str: with open(path, "rb") as f: return hashlib.md5(f.read()).hexdigest() def download_conda_package(package:str, version:Optional[str] = None, depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]: packages = conda.api.SubdirData.query_all(package, channels = [channel] if channel is not None else None, subdirs = _known_subdirs) rc = [] for pkg in packages: if version is not None and pkg.version != version: continue if depends is not None and depends not in pkg.depends: continue print(f"Downloading {pkg.url}...") os.makedirs(pkg.subdir, exist_ok = True) fname = f"{pkg.subdir}/{pkg.fn}" if not os.path.exists(fname): with open(fname, "wb") as f: with urllib.request.urlopen(pkg.url) as url: f.write(url.read()) if compute_md5(fname) != pkg.md5: print(f"md5 of {fname} is {compute_md5(fname)} does not match {pkg.md5}") continue rc.append(fname) return rc def upload_to_s3(prefix: str, fnames: List[str]) -> None: for fname in fnames: BUCKET.upload_file(fname, f"{prefix}/{fname}") print(fname) if __name__ == "__main__": for libname in ["torchvision", "torchaudio", "torchtext"]: rc = download_conda_package(libname, channel = "pytorch", depends = "pytorch 1.9.0") upload_to_s3("v1.9.0-rc4/conda", rc)