in petastorm/hdfs/namenode.py [0:0]
def __init__(self, hadoop_configuration=None):
"""
Sets the given HadoopConfiguration object for the resolver; or check for and pull hadoop
configuration from an environment variable, in below preferred order to check.
:param hadoop_configuration: an optional ``HadoopConfiguration``
"""
self._hadoop_env = None
self._hadoop_path = None
if hadoop_configuration is None:
# Pull from environment variable, in this preferred order
for env in ["HADOOP_HOME", "HADOOP_PREFIX", "HADOOP_INSTALL"]:
# Use the first available
if env in os.environ:
self._hadoop_env = env
self._hadoop_path = os.environ[env]
hadoop_configuration = {}
self._load_site_xml_into_dict(
'{}/etc/hadoop/hdfs-site.xml'.format(self._hadoop_path),
hadoop_configuration)
self._load_site_xml_into_dict(
'{}/etc/hadoop/core-site.xml'.format(self._hadoop_path),
hadoop_configuration)
break
if hadoop_configuration is None:
# ensures at least an empty dict so no further checks required in member functions
logger.warning('Unable to populate a sensible HadoopConfiguration for namenode resolution!\n'
'Path of last environment var (%s) tried [%s]. Please set up your Hadoop and \n'
'define environment variable HADOOP_HOME to point to your Hadoop installation path.',
self._hadoop_env, self._hadoop_path)
hadoop_configuration = {}
self._hadoop_configuration = hadoop_configuration