def download_ojdata()

in fclib/fclib/dataset/ojdata.py [0:0]


def download_ojdata(dest_dir="."):
    """Download orange juice dataset from the original source.

     Args:
        dest_dir (str): Directory path for the downloaded file
    
    Returns:
        str: Path of the downloaded file.
    """
    url = OJ_URL
    rda_path = maybe_download(url, dest_directory=dest_dir)

    # Check if data files exist
    data_exists = True
    for f in DATA_FILE_LIST:
        file_path = os.path.join(dest_dir, f)
        data_exists = data_exists and os.path.exists(file_path)

    if not data_exists:
        # Call data loading script
        repo_path = git_repo_path()
        script_path = os.path.join(repo_path, "fclib", "fclib", "dataset", SCRIPT_NAME)

        try:
            print(f"Destination directory: {dest_dir}")
            output = subprocess.run(
                ["Rscript", script_path, rda_path, dest_dir], stderr=subprocess.PIPE, stdout=subprocess.PIPE
            )
            print(output.stdout)
            if output.returncode != 0:
                raise Exception(f"Subprocess failed - {output.stderr}")

        except subprocess.CalledProcessError as e:
            raise e
    else:
        print("Data already exists at the specified location.")