in scripts/slurmsync.py [0:0]
def reconfigure_slurm():
CONFIG_HASH = Path("/slurm/scripts/.config.hash")
update_msg = "*** slurm configuration was updated ***"
cfg_old = load_config_file(CONFIG_FILE)
if cfg_old.hybrid:
# terraform handles generating the config.yaml, don't do it here
return
hash_new: hashlib.md5 = fetch_config_yaml_md5()
hash_old: str = read_hash(CONFIG_HASH)
if hash_new.hexdigest() != hash_old:
log.debug("Delta detected. Reconfiguring Slurm now.")
cfg_new = fetch_config_yaml()
save_hash(CONFIG_HASH, hash_new.hexdigest())
save_config(cfg_new, CONFIG_FILE)
cfg_new = load_config_file(CONFIG_FILE)
lkp = Lookup(cfg_new)
util.lkp = lkp
if lkp.instance_role_safe == "controller":
install_slurm_conf(lkp)
install_slurmdbd_conf(lkp)
gen_cloud_conf(lkp)
gen_cloud_gres_conf(lkp)
gen_topology_conf(lkp)
install_gres_conf(lkp)
install_cgroup_conf(lkp)
install_topology_conf(lkp)
log.info("Restarting slurmctld to make changes take effect.")
try:
run("sudo systemctl restart slurmctld.service", check=False)
run(f"{lkp.scontrol} reconfigure", timeout=30)
except Exception as e:
log.error(e)
util.run(f"wall '{update_msg}'", timeout=30)
log.debug("Done.")
elif lkp.instance_role_safe in ["compute", "login"]:
log.info("Restarting slurmd to make changes take effect.")
run("systemctl restart slurmd")
util.run(f"wall '{update_msg}'", timeout=30)
log.debug("Done.")