in setup_datasets.py [0:0]
def download_datasets(data_path):
os.makedirs(data_path, exist_ok=True)
logging.info("Downloading CelebA")
celeba_dir = os.path.join(data_path, "celeba")
os.makedirs(celeba_dir, exist_ok=True)
download_and_extract(
"https://drive.google.com/uc?id=1mb1R6dXfWbvk3DnlWOBO8pDeoBKOcLE6",
os.path.join(celeba_dir, "img_align_celeba.zip"),
)
download_and_extract(
"https://drive.google.com/uc?id=1acn0-nE4W7Wa17sIkKB0GtfW4Z41CMFB",
os.path.join(celeba_dir, "list_eval_partition.txt"),
remove=False
)
download_and_extract(
"https://drive.google.com/uc?id=11um21kRUuaUNoMl59TCe2fb01FNjqNms",
os.path.join(celeba_dir, "list_attr_celeba.txt"),
remove=False
)
logging.info("Downloading Waterbirds")
water_birds_dir = os.path.join(data_path, "waterbirds")
os.makedirs(water_birds_dir, exist_ok=True)
water_birds_dir_tar = os.path.join(water_birds_dir, "waterbirds.tar.gz")
download_and_extract(
"https://nlp.stanford.edu/data/dro/waterbird_complete95_forest2water2.tar.gz",
water_birds_dir_tar,
)
logging.info("Downloading MultiNLI")
multinli_dir = os.path.join(data_path, "multinli")
glue_dir = os.path.join(multinli_dir, "glue_data/MNLI/")
os.makedirs(glue_dir, exist_ok=True)
multinli_tar = os.path.join(glue_dir, "multinli_bert_features.tar.gz")
download_and_extract(
"https://nlp.stanford.edu/data/dro/multinli_bert_features.tar.gz",
multinli_tar,
)
os.makedirs(os.path.join(multinli_dir, "data"), exist_ok=True)
download_and_extract(
"https://raw.githubusercontent.com/kohpangwei/group_DRO/master/dataset_metadata/multinli/metadata_random.csv",
os.path.join(multinli_dir, "data", "metadata_random.csv"),
remove=False
)
logging.info("Downloading CivilComments")
civilcomments_dir = os.path.join(data_path, "civilcomments")
os.makedirs(civilcomments_dir, exist_ok=True)
download_and_extract(
"https://worksheets.codalab.org/rest/bundles/0x8cd3de0634154aeaad2ee6eb96723c6e/contents/blob/",
os.path.join(civilcomments_dir, "civilcomments.tar.gz"),
)