def hdfs_connect_namenode()

in petastorm/hdfs/namenode.py [0:0]


    def hdfs_connect_namenode(cls, url, driver='libhdfs3', user=None):
        """
        Performs HDFS connect in one place, facilitating easy change of driver and test mocking.

        :param url: An parsed URL object to the HDFS end point
        :param driver: An optional driver identifier
        :param user: String denoting username when connecting to HDFS. None implies login user.
        :return: Pyarrow HDFS connection object.
        """

        # According to pyarrow.hdfs.connect:
        #    host : NameNode. Set to "default" for fs.defaultFS from core-site.xml
        # So we pass 'default' as a host name if the url does not specify one (i.e. hdfs:///...)
        if LooseVersion(pyarrow.__version__) < LooseVersion('0.12.0'):
            hostname = url.hostname or 'default'
            driver = driver
        else:
            hostname = six.text_type(url.hostname or 'default')
            driver = six.text_type(driver)

        kwargs = dict(user=user)
        if LooseVersion(pyarrow.__version__) < LooseVersion('0.17.0'):
            # Support for libhdfs3 was removed in v0.17.0, we include it here for backwards
            # compatibility
            kwargs['driver'] = driver
        return pyarrow.hdfs.connect(hostname, url.port or 8020, **kwargs)