in src/smspark/bootstrapper.py [0:0]
def write_runtime_cluster_config(self) -> None:
primary_host = self.resource_config["hosts"][0]
primary_ip = socket.gethostbyname(primary_host)
current_host = self.resource_config["current_host"]
core_site_file_path = Bootstrapper.HADOOP_PATH + "/etc/hadoop/core-site.xml"
yarn_site_file_path = Bootstrapper.HADOOP_PATH + "/etc/hadoop/yarn-site.xml"
hadoop_env_file_path = Bootstrapper.HADOOP_PATH + "/etc/hadoop/hadoop-env.sh"
yarn_env_file_path = Bootstrapper.HADOOP_PATH + "/etc/hadoop/yarn-env.sh"
spark_conf_file_path = Bootstrapper.SPARK_PATH + "/conf/spark-defaults.conf"
# Pass through environment variables to hadoop env
with open(hadoop_env_file_path, "a") as hadoop_env_file:
hadoop_env_file.write("export SPARK_MASTER_HOST=" + primary_ip + "\n")
hadoop_env_file.write(
"export AWS_CONTAINER_CREDENTIALS_RELATIVE_URI="
+ os.environ.get("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", "")
+ "\n"
)
# Add YARN log directory
with open(yarn_env_file_path, "a") as yarn_env_file:
yarn_env_file.write("export YARN_LOG_DIR=/var/log/yarn/")
# Configure ip address for name node
with open(core_site_file_path, "r") as core_file:
file_data = core_file.read()
file_data = file_data.replace("nn_uri", primary_ip)
with open(core_site_file_path, "w") as core_file:
core_file.write(file_data)
# Set special regional configs (e.g. S3 endpoint)
self.set_regional_configs()
# Configure hostname for resource manager and node manager
with open(yarn_site_file_path, "r") as yarn_file:
file_data = yarn_file.read()
file_data = file_data.replace("rm_hostname", primary_ip)
file_data = file_data.replace("nm_hostname", current_host)
file_data = file_data.replace(
"nm_webapp_address", "{}:{}".format(current_host, self.NODEMANAGER_WEBAPP_ADDR_PORT)
)
with open(yarn_site_file_path, "w") as yarn_file:
yarn_file.write(file_data)
with open(yarn_site_file_path, "w") as yarn_file:
yarn_file.write(file_data)
with open(spark_conf_file_path, "r") as spark_file:
file_data = spark_file.read()
file_data = file_data.replace("sd_host", primary_ip)
with open(spark_conf_file_path, "w") as spark_file:
spark_file.write(file_data)
# Calculate and set Spark and Yarn resource allocation configs
self.set_yarn_spark_resource_config()
logging.info("Finished Yarn configuration files setup.")