in fclib/fclib/dataset/ojdata.py [0:0]
def download_ojdata(dest_dir="."):
"""Download orange juice dataset from the original source.
Args:
dest_dir (str): Directory path for the downloaded file
Returns:
str: Path of the downloaded file.
"""
url = OJ_URL
rda_path = maybe_download(url, dest_directory=dest_dir)
# Check if data files exist
data_exists = True
for f in DATA_FILE_LIST:
file_path = os.path.join(dest_dir, f)
data_exists = data_exists and os.path.exists(file_path)
if not data_exists:
# Call data loading script
repo_path = git_repo_path()
script_path = os.path.join(repo_path, "fclib", "fclib", "dataset", SCRIPT_NAME)
try:
print(f"Destination directory: {dest_dir}")
output = subprocess.run(
["Rscript", script_path, rda_path, dest_dir], stderr=subprocess.PIPE, stdout=subprocess.PIPE
)
print(output.stdout)
if output.returncode != 0:
raise Exception(f"Subprocess failed - {output.stderr}")
except subprocess.CalledProcessError as e:
raise e
else:
print("Data already exists at the specified location.")