def start_hadoop_daemons()

in src/smspark/bootstrapper.py [0:0]


    def start_hadoop_daemons(self) -> None:
        current_host = self.resource_config["current_host"]
        primary_host = self.resource_config["hosts"][0]

        # TODO: sync with EMR puppet scripts - ensure we are following best practices for starting hdfs/yarn daemons
        cmd_prep_namenode_dir = "rm -rf /opt/amazon/hadoop/hdfs/namenode && mkdir -p /opt/amazon/hadoop/hdfs/namenode"
        cmd_prep_datanode_dir = "rm -rf /opt/amazon/hadoop/hdfs/datanode && mkdir -p /opt/amazon/hadoop/hdfs/datanode"
        cmd_namenode_format = "hdfs namenode -format -force"
        cmd_namenode_start = "hdfs namenode"
        cmd_datanode_start = "hdfs datanode"
        cmd_resourcemanager_start = "yarn resourcemanager"
        cmd_nodemanager_start = "yarn nodemanager"

        if current_host == primary_host:
            subprocess.call(cmd_prep_namenode_dir, shell=True)
            subprocess.call(cmd_prep_datanode_dir, shell=True)
            subprocess.call(cmd_namenode_format, shell=True)
            subprocess.Popen(cmd_namenode_start, shell=True)
            subprocess.Popen(cmd_datanode_start, shell=True)
            subprocess.Popen(cmd_resourcemanager_start, shell=True)
            subprocess.Popen(cmd_nodemanager_start, shell=True)
            # TODO: wait for daemons to stabilize on primary + worker nodes
        else:
            subprocess.call(cmd_prep_datanode_dir, shell=True)
            subprocess.Popen(cmd_datanode_start, shell=True)
            subprocess.Popen(cmd_nodemanager_start, shell=True)