def download_dataset()

in pytorch_alternatives/custom_pytorch_nlp/util/preprocessing.py [0:0]


def download_dataset():
    os.makedirs("data", exist_ok=True)
    zip_filepath = os.path.join("data", "ag_news_csv.tgz")

    if wait_for_file_stable(zip_filepath):
        print("Using previously-downloaded dataset")
    else:
        print("Downloading data...")
        subprocess.call(
            [f"aws s3 cp s3://fast-ai-nlp/ag_news_csv.tgz {zip_filepath} --no-sign-request"],
            shell=True,
        )

    with tarfile.open(zip_filepath, 'r:gz') as tar:
        print("Unzipping...")
        tar.extractall(path="data")
        tar.close()
    try:
        # Clean up the noise in the folder, don't care too much if it fails:
        shutil.rmtree(os.path.join("data", "__MACOSX"))
    except:
        pass
    print("Saved to data/ folder")