in src/smspark/bootstrapper.py [0:0]
def start_hadoop_daemons(self) -> None:
current_host = self.resource_config["current_host"]
primary_host = self.resource_config["hosts"][0]
# TODO: sync with EMR puppet scripts - ensure we are following best practices for starting hdfs/yarn daemons
cmd_prep_namenode_dir = "rm -rf /opt/amazon/hadoop/hdfs/namenode && mkdir -p /opt/amazon/hadoop/hdfs/namenode"
cmd_prep_datanode_dir = "rm -rf /opt/amazon/hadoop/hdfs/datanode && mkdir -p /opt/amazon/hadoop/hdfs/datanode"
cmd_namenode_format = "hdfs namenode -format -force"
cmd_namenode_start = "hdfs namenode"
cmd_datanode_start = "hdfs datanode"
cmd_resourcemanager_start = "yarn resourcemanager"
cmd_nodemanager_start = "yarn nodemanager"
if current_host == primary_host:
subprocess.call(cmd_prep_namenode_dir, shell=True)
subprocess.call(cmd_prep_datanode_dir, shell=True)
subprocess.call(cmd_namenode_format, shell=True)
subprocess.Popen(cmd_namenode_start, shell=True)
subprocess.Popen(cmd_datanode_start, shell=True)
subprocess.Popen(cmd_resourcemanager_start, shell=True)
subprocess.Popen(cmd_nodemanager_start, shell=True)
# TODO: wait for daemons to stabilize on primary + worker nodes
else:
subprocess.call(cmd_prep_datanode_dir, shell=True)
subprocess.Popen(cmd_datanode_start, shell=True)
subprocess.Popen(cmd_nodemanager_start, shell=True)