in petastorm/hdfs/namenode.py [0:0]
def hdfs_connect_namenode(cls, url, driver='libhdfs3', user=None):
"""
Performs HDFS connect in one place, facilitating easy change of driver and test mocking.
:param url: An parsed URL object to the HDFS end point
:param driver: An optional driver identifier
:param user: String denoting username when connecting to HDFS. None implies login user.
:return: Pyarrow HDFS connection object.
"""
# According to pyarrow.hdfs.connect:
# host : NameNode. Set to "default" for fs.defaultFS from core-site.xml
# So we pass 'default' as a host name if the url does not specify one (i.e. hdfs:///...)
if LooseVersion(pyarrow.__version__) < LooseVersion('0.12.0'):
hostname = url.hostname or 'default'
driver = driver
else:
hostname = six.text_type(url.hostname or 'default')
driver = six.text_type(driver)
kwargs = dict(user=user)
if LooseVersion(pyarrow.__version__) < LooseVersion('0.17.0'):
# Support for libhdfs3 was removed in v0.17.0, we include it here for backwards
# compatibility
kwargs['driver'] = driver
return pyarrow.hdfs.connect(hostname, url.port or 8020, **kwargs)