azure-slurm-install/install.py (557 lines of code) (raw):
import argparse
import json
import logging
import logging.config
import os
import re
import subprocess
import sys
import installlib as ilib
from typing import Dict, Optional
# Legacy: used for connecting to Azure MariaDB, which is deprecated.
LOCAL_AZURE_CA_PEM = "AzureCA.pem"
class InstallSettings:
def __init__(self, config: Dict, platform_family: str, mode: str) -> None:
self.config = config
if "slurm" not in config:
config["slurm"] = {}
if "accounting" not in config["slurm"]:
config["slurm"]["acccounting"] = {}
if "user" not in config["slurm"]:
config["slurm"]["user"] = {}
if "munge" not in config:
config["munge"] = {}
if "user" not in config["munge"]:
config["munge"]["user"] = {}
self.autoscale_dir = (
config["slurm"].get("autoscale_dir") or "/opt/azurehpc/slurm"
)
self.cyclecloud_cluster_name = config["cluster_name"]
# We use a "safe" form of the CycleCloud ClusterName
# First we lowercase the cluster name, then replace anything
# that is not letters, digits and '-' with a '-'
# eg My Cluster == my-cluster.
# This is needed because cluster names are used to create hostnames
# hostname conventions do not allow underscores and spaces
# https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names.
# Since this PR: https://github.com/Azure/cyclecloud-slurm/pull/241 we now have
# to use cluster name to create a database name if one is not provided. But database naming
# conventions conflict witn hostname naming conventions and it cannot contain "-" hyphens.
# For now we use a second sanitized cluster name that derives from the escaped cluster name
# but converts all hyphens to underscores.
self.slurm_cluster_name = _escape(self.cyclecloud_cluster_name)
self.slurm_db_cluster_name = re.sub(r'-', '_', self.slurm_cluster_name)
self.node_name = config["node_name"]
self.hostname = config["hostname"]
self.ipv4 = config["ipaddress"]
self.slurmver = config["slurm"]["version"]
self.vm_size = config["azure"]["metadata"]["compute"]["vmSize"]
self.slurm_user: str = config["slurm"]["user"].get("name") or "slurm"
self.slurm_grp: str = config["slurm"]["user"].get("group") or "slurm"
self.slurm_uid: str = config["slurm"]["user"].get("uid") or "11100"
self.slurm_gid: str = config["slurm"]["user"].get("gid") or "11100"
self.munge_user: str = config["munge"]["user"].get("name") or "munge"
self.munge_grp: str = config["munge"]["user"].get("group") or "munge"
self.munge_uid: str = config["munge"]["user"].get("uid") or "11101"
self.munge_gid: str = config["munge"]["user"].get("gid") or "11101"
self.acct_enabled: bool = config["slurm"]["accounting"].get("enabled", False)
self.acct_user: Optional[str] = config["slurm"]["accounting"].get("user")
self.acct_pass: Optional[str] = config["slurm"]["accounting"].get("password")
self.acct_url: Optional[str] = config["slurm"]["accounting"].get("url")
self.acct_cert_url: Optional[str] = config["slurm"]["accounting"].get("certificate_url")
self.acct_storageloc :Optional[str] = config["slurm"]["accounting"].get("storageloc")
self.use_nodename_as_hostname = config["slurm"].get(
"use_nodename_as_hostname", False
)
self.node_name_prefix = config["slurm"].get("node_prefix")
if self.node_name_prefix:
self.node_name_prefix = re.sub(
"[^a-zA-Z0-9-]", "-", self.node_name_prefix
).lower()
self.ensure_waagent_monitor_hostname = config["slurm"].get(
"ensure_waagent_monitor_hostname", True
)
self.platform_family = platform_family
self.mode = mode
self.dynamic_config = config["slurm"].get("dynamic_config", None)
self.dynamic_feature = config["slurm"].get("dynamic_feature", None)
#TODO: Dynamic_config will be deprecated. Remove for 4.x
if self.dynamic_config:
self.dynamic_config = _inject_vm_size(self.dynamic_config, self.vm_size)
elif self.dynamic_feature:
self.dynamic_feature = f"{self.dynamic_feature},{self.vm_size}"
self.max_node_count = int(config["slurm"].get("max_node_count", 10000))
self.additonal_slurm_config = (
config["slurm"].get("additional", {}).get("config")
)
self.secondary_scheduler_name = config["slurm"].get("secondary_scheduler_name")
self.is_primary_scheduler = config["slurm"].get("is_primary_scheduler", self.mode == "scheduler")
self.config_dir = f"/sched/{self.slurm_cluster_name}"
# Leave the ability to disable this.
self.ubuntu22_waagent_fix = config["slurm"].get("ubuntu22_waagent_fix", True)
def _inject_vm_size(dynamic_config: str, vm_size: str) -> str:
lc = dynamic_config.lower()
if "feature=" not in lc:
logging.warning("Dynamic config is specified but no 'Feature={some_flag}' is set under slurm.dynamic_config.")
return dynamic_config
else:
ret = []
for tok in dynamic_config.split():
if tok.lower().startswith("feature="):
ret.append(f"Feature={vm_size},{tok[len('Feature='):]}")
else:
ret.append(tok)
return " ".join(ret)
def setup_config_dir(s: InstallSettings) -> None:
# set up config dir inside {s.config_dir} mount.
if s.is_primary_scheduler:
ilib.directory(s.config_dir, owner="root", group="root", mode=755)
def _escape(s: str) -> str:
return re.sub("[^a-zA-Z0-9-]", "-", s).lower()
def setup_users(s: InstallSettings) -> None:
# Set up users for Slurm and Munge
ilib.group(s.slurm_grp, gid=s.slurm_gid)
ilib.user(
s.slurm_user,
comment="User to run slurmctld",
shell="/bin/false",
uid=s.slurm_uid,
gid=s.slurm_gid,
)
ilib.group(s.munge_user, gid=s.munge_gid)
ilib.user(
s.munge_user,
comment="User to run munged",
shell="/bin/false",
uid=s.munge_uid,
gid=s.munge_gid,
)
def run_installer(s: InstallSettings, path: str, mode: str) -> None:
subprocess.check_call([path, mode, s.slurmver])
def fix_permissions(s: InstallSettings) -> None:
# Fix munge permissions and create key
ilib.directory(
"/var/lib/munge",
owner=s.munge_user,
group=s.munge_grp,
mode=711,
recursive=True,
)
ilib.directory(
"/var/log/munge", owner="root", group="root", mode=700, recursive=True
)
ilib.directory(
"/run/munge", owner=s.munge_user, group=s.munge_grp, mode=755, recursive=True
)
ilib.directory(f"{s.config_dir}/munge", owner=s.munge_user, group=s.munge_grp, mode=700)
# Set up slurm
ilib.user(s.slurm_user, comment="User to run slurmctld", shell="/bin/false")
# add slurm to cyclecloud so it has access to jetpack / userdata
if os.path.exists("/opt/cycle/jetpack"):
ilib.group_members("cyclecloud", members=[s.slurm_user], append=True)
ilib.directory("/var/spool/slurmd", owner=s.slurm_user, group=s.slurm_grp)
ilib.directory("/var/log/slurmd", owner=s.slurm_user, group=s.slurm_grp)
ilib.directory("/var/log/slurmctld", owner=s.slurm_user, group=s.slurm_grp)
def munge_key(s: InstallSettings) -> None:
ilib.directory(
"/etc/munge", owner=s.munge_user, group=s.munge_grp, mode=700, recursive=True
)
if s.mode == "scheduler" and not os.path.exists(f"{s.config_dir}/munge.key"):
# TODO only should do this on the primary
# we should skip this for secondary HA nodes
with open("/dev/urandom", "rb") as fr:
buf = bytes()
while len(buf) < 1024:
buf = buf + fr.read(1024 - len(buf))
ilib.file(
f"{s.config_dir}/munge.key",
content=buf,
owner=s.munge_user,
group=s.munge_grp,
mode=700,
)
ilib.copy_file(
f"{s.config_dir}/munge.key",
"/etc/munge/munge.key",
owner=s.munge_user,
group=s.munge_grp,
mode="0600",
)
def accounting(s: InstallSettings) -> None:
if s.mode != "scheduler":
return
if s.is_primary_scheduler:
_accounting_primary(s)
_accounting_all(s)
def _accounting_primary(s: InstallSettings) -> None:
"""
Only the primary scheduler should be creating files under
{s.config_dir} for accounting.
"""
if s.secondary_scheduler_name:
secondary_scheduler = ilib.await_node_hostname(
s.config, s.secondary_scheduler_name
)
if not s.acct_enabled:
logging.info("slurm.accounting.enabled is false, skipping this step.")
ilib.file(
f"{s.config_dir}/accounting.conf",
owner=s.slurm_user,
group=s.slurm_grp,
content="AccountingStorageType=accounting_storage/none",
)
return
ilib.file(
f"{s.config_dir}/accounting.conf",
owner=s.slurm_user,
group=s.slurm_grp,
content=f"""
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost={s.hostname}
AccountingStorageTRES=gres/gpu
""",
)
# Previously this was required when connecting to any Azure MariaDB instance.
# Which is why we shipped with LOCAL_AZURE_CA_PEM.
if s.acct_cert_url and s.acct_cert_url != LOCAL_AZURE_CA_PEM:
logging.info(f"Downloading {s.acct_cert_url} to {s.config_dir}/AzureCA.pem")
subprocess.check_call(
[
"wget",
"-O",
f"{s.config_dir}/AzureCA.pem",
s.acct_cert_url,
]
)
ilib.chown(
f"{s.config_dir}/AzureCA.pem", owner=s.slurm_user, group=s.slurm_grp
)
ilib.chmod(f"{s.config_dir}/AzureCA.pem", mode="0600")
elif s.acct_cert_url and s.acct_cert_url == LOCAL_AZURE_CA_PEM:
ilib.copy_file(
LOCAL_AZURE_CA_PEM,
f"{s.config_dir}/AzureCA.pem",
owner=s.slurm_user,
group=s.slurm_grp,
mode="0600",
)
# Configure slurmdbd.conf
ilib.template(
f"{s.config_dir}/slurmdbd.conf",
owner=s.slurm_user,
group=s.slurm_grp,
source="templates/slurmdbd.conf.template",
mode=600,
variables={
"accountdb": s.acct_url or "localhost",
"dbuser": s.acct_user or "root",
"dbdhost": s.hostname,
"storagepass": f"StoragePass={s.acct_pass}" if s.acct_pass else "#StoragePass=",
"storage_parameters": "StorageParameters=SSL_CA=/etc/slurm/AzureCA.pem"
if s.acct_cert_url
else "#StorageParameters=",
"slurmver": s.slurmver,
"storageloc": s.acct_storageloc or f"{s.slurm_db_cluster_name}_acct_db",
},
)
if s.secondary_scheduler_name:
ilib.append_file(
f"{s.config_dir}/accounting.conf",
content=f"AccountingStorageBackupHost={secondary_scheduler.hostname}\n",
comment_prefix="\n# Additional HA Storage Backup host -"
)
ilib.append_file(
f"{s.config_dir}/slurmdbd.conf",
content=f"DbdBackupHost={secondary_scheduler.hostname}\n",
comment_prefix="\n# Additional HA dbd host -"
)
def _accounting_all(s: InstallSettings) -> None:
"""
Perform linking and enabling of slurmdbd
"""
# This used to be required for all installations, but it is
# now optional, so only create the link if required.
original_azure_ca_pem = f"{s.config_dir}/AzureCA.pem"
if os.path.exists(original_azure_ca_pem):
ilib.link(
f"{s.config_dir}/AzureCA.pem",
"/etc/slurm/AzureCA.pem",
owner=s.slurm_user,
group=s.slurm_grp,
)
# Link shared slurmdbd.conf to real config file location
ilib.link(
f"{s.config_dir}/slurmdbd.conf",
"/etc/slurm/slurmdbd.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.enable_service("slurmdbd")
def complete_install(s: InstallSettings) -> None:
if s.mode == "scheduler":
if s.is_primary_scheduler:
_complete_install_primary(s)
_complete_install_all(s)
else:
_complete_install_all(s)
def _complete_install_primary(s: InstallSettings) -> None:
"""
Only the primary scheduler should be creating files under {s.config_dir}.
"""
assert s.is_primary_scheduler
secondary_scheduler = None
if s.secondary_scheduler_name:
secondary_scheduler = ilib.await_node_hostname(
s.config, s.secondary_scheduler_name
)
state_save_location = f"{s.config_dir}/spool/slurmctld"
if not os.path.exists(state_save_location):
ilib.directory(state_save_location, owner=s.slurm_user, group=s.slurm_grp)
if not os.path.exists(f"{s.config_dir}/prolog.d"):
ilib.directory(f"{s.config_dir}/prolog.d", owner=s.slurm_user, group=s.slurm_grp)
if not os.path.exists(f"{s.config_dir}/epilog.d"):
ilib.directory(f"{s.config_dir}/epilog.d", owner=s.slurm_user, group=s.slurm_grp)
ilib.template(
f"{s.config_dir}/slurm.conf",
owner=s.slurm_user,
group=s.slurm_grp,
mode="0644",
source="templates/slurm.conf.template",
variables={
"slurmctldhost": f"{s.hostname}({s.ipv4})",
"cluster_name": s.slurm_cluster_name,
"max_node_count": s.max_node_count,
"state_save_location": state_save_location,
"prolog": "/etc/slurm/prolog.d/*",
"epilog": "/etc/slurm/epilog.d/*"
},
)
if secondary_scheduler:
ilib.append_file(
f"{s.config_dir}/slurm.conf",
content=f"SlurmCtldHost={secondary_scheduler.hostname}({secondary_scheduler.private_ipv4})\n",
comment_prefix="\n# Additional HA scheduler host -",
)
if s.additonal_slurm_config:
ilib.append_file(
f"{s.config_dir}/slurm.conf",
content=s.additonal_slurm_config,
comment_prefix="\n# Additional config from CycleCloud -",
)
ilib.template(
f"{s.config_dir}/cgroup.conf",
owner=s.slurm_user,
group=s.slurm_grp,
source=f"templates/cgroup.conf.template",
mode="0644",
)
if not os.path.exists(f"{s.config_dir}/azure.conf"):
ilib.file(
f"{s.config_dir}/azure.conf",
owner=s.slurm_user,
group=s.slurm_grp,
mode="0644",
content="",
)
if not os.path.exists(f"{s.config_dir}/keep_alive.conf"):
ilib.file(
f"{s.config_dir}/keep_alive.conf",
owner=s.slurm_user,
group=s.slurm_grp,
mode="0644",
content="# Do not edit this file. It is managed by azslurm",
)
if not os.path.exists(f"{s.config_dir}/gres.conf"):
ilib.file(
f"{s.config_dir}/gres.conf",
owner=s.slurm_user,
group=s.slurm_grp,
mode="0644",
content="",
)
if not os.path.exists(f"{s.config_dir}/plugstack.conf"):
ilib.file(
f"{s.config_dir}/plugstack.conf",
owner=s.slurm_user,
group=s.slurm_grp,
mode="0644",
content=f"include /etc/slurm/plugstack.conf.d/*"
)
def _complete_install_all(s: InstallSettings) -> None:
ilib.link(
f"{s.config_dir}/gres.conf",
"/etc/slurm/gres.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/slurm.conf",
"/etc/slurm/slurm.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/cgroup.conf",
"/etc/slurm/cgroup.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/azure.conf",
"/etc/slurm/azure.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/keep_alive.conf",
"/etc/slurm/keep_alive.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/plugstack.conf",
"/etc/slurm/plugstack.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/prolog.d",
"/etc/slurm/prolog.d",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.link(
f"{s.config_dir}/epilog.d",
"/etc/slurm/epilog.d",
owner=s.slurm_user,
group=s.slurm_grp,
)
if not os.path.exists("/etc/slurm/plugstack.conf.d"):
os.makedirs("/etc/slurm/plugstack.conf.d")
ilib.directory("/etc/slurm/plugstack.conf.d",
owner=s.slurm_user,
group=s.slurm_grp,
)
# Link the accounting.conf regardless
ilib.link(
f"{s.config_dir}/accounting.conf",
"/etc/slurm/accounting.conf",
owner=s.slurm_user,
group=s.slurm_grp,
)
ilib.template(
"/etc/security/limits.d/slurm-limits.conf",
source="templates/slurm-limits.conf",
owner="root",
group="root",
mode=644,
)
ilib.directory(
"/etc/systemd/system/slurmctld.service.d", owner="root", group="root", mode=755
)
ilib.template(
"/etc/systemd/system/slurmctld.service.d/override.conf",
source="templates/slurmctld.override",
owner="root",
group="root",
mode=644,
)
ilib.directory(
"/etc/systemd/system/munge.service.d", owner="root", group="root", mode=755
)
ilib.template(
"/etc/systemd/system/munge.service.d/override.conf",
source="templates/munge.override",
owner="root",
group="root",
mode=644,
)
ilib.enable_service("munge")
ilib.template(
"/etc/slurm/job_submit.lua.azurehpc.example",
source="templates/job_submit.lua",
owner="root",
group="root",
mode=644,
)
def get_gres_count(hostname):
count = 0
try:
with open("/etc/slurm/gres.conf", 'r') as file:
for line in file:
nodename_match = re.search(r'Nodename=([^\s]+)', line, re.IGNORECASE)
count_match = re.search(r'count=(\d+)', line, re.IGNORECASE)
if nodename_match and count_match:
nodename = nodename_match.group(1)
# This command is local to the node and does not send an RPC to the controller.
if hostname in subprocess.run(['scontrol', 'show', 'hostnames', nodename], stdout=subprocess.PIPE, universal_newlines=True).stdout:
count = int(count_match.group(1))
except Exception as e:
logging.error(f"An error occurred: {e}")
return count
def setup_slurmd(s: InstallSettings) -> None:
slurmd_config = f"SLURMD_OPTIONS=-b -N {s.node_name}"
if s.dynamic_feature or s.dynamic_config:
if s.dynamic_feature:
override_conf = ""
# Dynamic GPU nodes have to have their gres manually defined by the user before they can be started.
# Check if gres is defined for this node and then add that to configuration options.
gpu_count = get_gres_count(s.node_name)
if gpu_count > 0:
gres_str = f"gres=gpu:{gpu_count}"
override_conf += f" {gres_str}"
override_conf += f" Feature={s.dynamic_feature}"
dynamic_config = f"-Z --conf \"{override_conf}\""
else:
# If user has supplied us dynamic config in the template.
#TODO: dynamic_config will be removed for 4.x
dynamic_config = f"{s.dynamic_config}"
logging.debug("Dynamic config: %s" % dynamic_config)
slurmd_config = f"SLURMD_OPTIONS={dynamic_config} -N {s.node_name}"
if "-b" not in slurmd_config.split():
slurmd_config = slurmd_config + " -b"
ilib.file(
"/etc/sysconfig/slurmd" if s.platform_family == "rhel" else "/etc/default/slurmd",
content=slurmd_config,
owner=s.slurm_user,
group=s.slurm_grp,
mode="0700",
)
ilib.enable_service("slurmd")
def set_hostname(s: InstallSettings) -> None:
if not s.use_nodename_as_hostname:
return
if s.is_primary_scheduler:
return
new_hostname = s.node_name.lower()
if s.mode != "execute" and not new_hostname.startswith(s.node_name_prefix):
new_hostname = f"{s.node_name_prefix}{new_hostname}"
ilib.set_hostname(
new_hostname, s.platform_family, s.ensure_waagent_monitor_hostname
)
if _is_at_least_ubuntu22() and s.ubuntu22_waagent_fix:
logging.warning("Restarting systemd-networkd to fix waagent/hostname issue on Ubuntu 22.04." +
" To disable this, set slurm.ubuntu22_waagent_fix=false under this" +
" node/nodearray's [[[configuration]]] section")
subprocess.check_call(["systemctl", "restart", "systemd-networkd"])
def _is_at_least_ubuntu22() -> bool:
if not os.path.exists("/etc/os-release"):
return False
lsb_rel = {}
with open("/etc/os-release") as fr:
for line in fr:
line = line.strip()
if not line:
continue
if "=" not in line:
continue
key, val = line.split("=", 1)
lsb_rel[key.strip().upper()] = val.strip('"').strip().lower()
if lsb_rel.get("ID") == "ubuntu" and lsb_rel.get("VERSION_ID", "") >= "22.04":
return True
return False
def _load_config(bootstrap_config: str) -> Dict:
if bootstrap_config == "jetpack":
config = json.loads(subprocess.check_output(["jetpack", "config", "--json"]))
else:
with open(bootstrap_config) as fr:
config = json.load(fr)
if "cluster_name" not in config:
config["cluster_name"] = config["cyclecloud"]["cluster"]["name"]
config["node_name"] = config["cyclecloud"]["node"]["name"]
return config
def main() -> None:
# needed to set slurmctld only
if os.path.exists("install_logging.conf"):
logging.config.fileConfig("install_logging.conf")
parser = argparse.ArgumentParser()
parser.add_argument(
"--platform", default="rhel", choices=["rhel", "ubuntu", "suse", "debian"]
)
parser.add_argument(
"--mode", default="scheduler", choices=["scheduler", "execute", "login"]
)
parser.add_argument("--bootstrap-config", default="jetpack")
args = parser.parse_args()
if args.platform == "debian":
args.platform = "ubuntu"
config = _load_config(args.bootstrap_config)
settings = InstallSettings(config, args.platform, args.mode)
#create config dir
setup_config_dir(settings)
# create the users
setup_users(settings)
# create the munge key and/or copy it to /etc/munge/
munge_key(settings)
# runs either rhel.sh or ubuntu.sh to install the packages
run_installer(settings, os.path.abspath(f"{args.platform}.sh"), args.mode)
# various permissions fixes
fix_permissions(settings)
complete_install(settings)
if settings.mode == "scheduler":
accounting(settings)
# TODO create a rotate log
ilib.cron(
"return_to_idle",
minute="*/5",
command=f"{settings.autoscale_dir}/return_to_idle.sh 1>&2 >> {settings.autoscale_dir}/logs/return_to_idle.log",
)
set_hostname(settings)
if settings.mode == "execute":
setup_slurmd(settings)
if __name__ == "__main__":
try:
main()
except:
print(
"An error occured during installation. See log file /var/log/azure-slurm-install.log for details.",
file=sys.stderr,
)
logging.exception("An error occured during installation.")
sys.exit(1)