def _get_list_of_files()

in src/smspark/cli.py [0:0]


def _get_list_of_files(path_str: str) -> str:
    """Expand an absolute path into a comma-delimited list of files under that path.

    Certain smspark-submit options (--jars, --files, --py-files) may be a path
    to a directory containing jars, files, or python files, whereas spark-submit
    expects a comma-delimited list of absolute file paths.

    For example, given a namespace with an attribute "jars" and value "/path/to/jars/dir",
    this function returns ["--jars", "/path/to/jars/dir/file1.jar,/path/to/jars/dir/file2.jar"].
    Since the value of the "jars" option is now a comma-delimited list of file paths rather
    than a directory, it's a valid input for spark-submit's "--jars" option.

    If the given path is not a directory, the path is returned as is.
    For example, given --jars "/path/to/my/jar1.jar,/path/to/my/jar2.jar", this
    function returns the path as-given.

    Includes top-level files, as well as files found recursively in subdirectories
    of the given path.
    """
    if not path_str:
        raise InputError(ValueError(f"path {path_str} must not be empty"))

    paths = path_str.split(",")
    expanded_paths = []
    for path in paths:
        result = urlparse(path_str)
        if result.scheme == "s3" or result.scheme == "s3a":
            expanded_paths.append(path)
        elif result.scheme == "file" or not result.scheme:
            file_path = pathlib.Path(path)
            if not file_path.is_absolute():
                raise InputError(ValueError(f"file path {file_path} must be an absolute path to a file or directory"))

            file_path = file_path.resolve()

            # In the typical case, file_path points to a directory containing files.
            if not file_path.exists():
                raise InputError(ValueError(f"file path {file_path} does not exist"))

            if file_path.is_dir():
                files = [str(f.resolve()) for f in file_path.iterdir() if f.is_file()]
                if not files:
                    raise InputError(ValueError(f"Found zero files in {file_path}"))
                for f in files:
                    expanded_paths.append(f)
            elif file_path.is_file():
                expanded_paths.append(str(file_path))
            else:
                raise InputError(ValueError(f"file at {file_path} is not a regular file or directory"))

    return ",".join(expanded_paths)