def get_filesystem_and_path_or_paths()

in petastorm/fs_utils.py [0:0]


def get_filesystem_and_path_or_paths(url_or_urls, hdfs_driver='libhdfs3'):
    """
    Given a url or url list, return a tuple ``(filesystem, path_or_paths)``
    ``filesystem`` is created from the given url(s), and ``path_or_paths`` is a path or path list
    extracted from the given url(s)
    if url list given, the urls must have the same scheme and netloc.
    """
    if isinstance(url_or_urls, list):
        url_list = url_or_urls
    else:
        url_list = [url_or_urls]

    parsed_url_list = [urlparse(url) for url in url_list]

    first_scheme = parsed_url_list[0].scheme
    first_netloc = parsed_url_list[0].netloc

    for parsed_url in parsed_url_list:
        if parsed_url.scheme != first_scheme or parsed_url.netloc != first_netloc:
            raise ValueError('The dataset url list must contain url with the same scheme and netloc.')

    fs = FilesystemResolver(url_list[0], hdfs_driver=hdfs_driver).filesystem()
    path_list = [get_dataset_path(parsed_url) for parsed_url in parsed_url_list]

    if isinstance(url_or_urls, list):
        path_or_paths = path_list
    else:
        path_or_paths = path_list[0]

    return fs, path_or_paths