in petastorm/fs_utils.py [0:0]
def get_filesystem_and_path_or_paths(url_or_urls, hdfs_driver='libhdfs3'):
"""
Given a url or url list, return a tuple ``(filesystem, path_or_paths)``
``filesystem`` is created from the given url(s), and ``path_or_paths`` is a path or path list
extracted from the given url(s)
if url list given, the urls must have the same scheme and netloc.
"""
if isinstance(url_or_urls, list):
url_list = url_or_urls
else:
url_list = [url_or_urls]
parsed_url_list = [urlparse(url) for url in url_list]
first_scheme = parsed_url_list[0].scheme
first_netloc = parsed_url_list[0].netloc
for parsed_url in parsed_url_list:
if parsed_url.scheme != first_scheme or parsed_url.netloc != first_netloc:
raise ValueError('The dataset url list must contain url with the same scheme and netloc.')
fs = FilesystemResolver(url_list[0], hdfs_driver=hdfs_driver).filesystem()
path_list = [get_dataset_path(parsed_url) for parsed_url in parsed_url_list]
if isinstance(url_or_urls, list):
path_or_paths = path_list
else:
path_or_paths = path_list[0]
return fs, path_or_paths