in ansible/roles/slurm/files/scripts/setup.py [0:0]
def setup_controller(args):
"""Run controller setup"""
log.info("Setting up controller")
util.chown_slurm(dirs.scripts / "config.yaml", mode=0o600)
install_custom_scripts()
install_slurm_conf(lkp)
install_slurmdbd_conf(lkp)
gen_cloud_conf(lkp)
gen_cloud_gres_conf(lkp)
gen_topology_conf(lkp)
install_gres_conf(lkp)
install_cgroup_conf(lkp)
install_topology_conf(lkp)
install_jobsubmit_lua(lkp)
setup_jwt_key()
setup_munge_key()
setup_sudoers()
if cfg.controller_secondary_disk:
setup_secondary_disks()
setup_network_storage(log)
run_custom_scripts()
if not cfg.cloudsql_secret:
configure_mysql()
run("systemctl enable slurmdbd", timeout=30)
run("systemctl restart slurmdbd", timeout=30)
# Wait for slurmdbd to come up
time.sleep(5)
sacctmgr = f"{slurmdirs.prefix}/bin/sacctmgr -i"
result = run(
f"{sacctmgr} add cluster {cfg.slurm_cluster_name}", timeout=30, check=False
)
if "already exists" in result.stdout:
log.info(result.stdout)
elif result.returncode > 1:
result.check_returncode() # will raise error
run("systemctl enable slurmctld", timeout=30)
run("systemctl restart slurmctld", timeout=30)
run("systemctl enable slurmrestd", timeout=30)
run("systemctl restart slurmrestd", timeout=30)
# Export at the end to signal that everything is up
run("systemctl enable nfs-server", timeout=30)
run("systemctl start nfs-server", timeout=30)
setup_nfs_exports()
run("systemctl enable --now slurmcmd.timer", timeout=30)
log.info("Check status of cluster services")
run("systemctl status munge", timeout=30)
run("systemctl status slurmdbd", timeout=30)
run("systemctl status slurmctld", timeout=30)
run("systemctl status slurmrestd", timeout=30)
sync_slurm()
run("systemctl enable slurm_load_bq.timer", timeout=30)
run("systemctl start slurm_load_bq.timer", timeout=30)
run("systemctl status slurm_load_bq.timer", timeout=30)
log.info("Done setting up controller")
pass