in src/smspark/cli.py [0:0]
def _get_list_of_files(path_str: str) -> str:
"""Expand an absolute path into a comma-delimited list of files under that path.
Certain smspark-submit options (--jars, --files, --py-files) may be a path
to a directory containing jars, files, or python files, whereas spark-submit
expects a comma-delimited list of absolute file paths.
For example, given a namespace with an attribute "jars" and value "/path/to/jars/dir",
this function returns ["--jars", "/path/to/jars/dir/file1.jar,/path/to/jars/dir/file2.jar"].
Since the value of the "jars" option is now a comma-delimited list of file paths rather
than a directory, it's a valid input for spark-submit's "--jars" option.
If the given path is not a directory, the path is returned as is.
For example, given --jars "/path/to/my/jar1.jar,/path/to/my/jar2.jar", this
function returns the path as-given.
Includes top-level files, as well as files found recursively in subdirectories
of the given path.
"""
if not path_str:
raise InputError(ValueError(f"path {path_str} must not be empty"))
paths = path_str.split(",")
expanded_paths = []
for path in paths:
result = urlparse(path_str)
if result.scheme == "s3" or result.scheme == "s3a":
expanded_paths.append(path)
elif result.scheme == "file" or not result.scheme:
file_path = pathlib.Path(path)
if not file_path.is_absolute():
raise InputError(ValueError(f"file path {file_path} must be an absolute path to a file or directory"))
file_path = file_path.resolve()
# In the typical case, file_path points to a directory containing files.
if not file_path.exists():
raise InputError(ValueError(f"file path {file_path} does not exist"))
if file_path.is_dir():
files = [str(f.resolve()) for f in file_path.iterdir() if f.is_file()]
if not files:
raise InputError(ValueError(f"Found zero files in {file_path}"))
for f in files:
expanded_paths.append(f)
elif file_path.is_file():
expanded_paths.append(str(file_path))
else:
raise InputError(ValueError(f"file at {file_path} is not a regular file or directory"))
return ",".join(expanded_paths)