in scripts/setup_network_storage.py [0:0]
def munge_mount_handler(log):
if not cfg.munge_mount:
log.error("Missing munge_mount in cfg")
elif lkp.instance_role == "controller":
return
mount = cfg.munge_mount
server_ip = (
mount.server_ip
if mount.server_ip
else (cfg.slurm_control_addr or cfg.slurm_control_host)
)
remote_mount = mount.remote_mount
local_mount = Path("/mnt/munge")
fs_type = mount.fs_type if mount.fs_type is not None else "nfs"
mount_options = (
mount.mount_options
if mount.mount_options is not None
else "defaults,hard,intr,_netdev"
)
munge_key = Path(dirs.munge / "munge.key")
log.info(f"Mounting munge share to: {local_mount}")
local_mount.mkdir()
if fs_type.lower() == "gcsfuse".lower():
if remote_mount is None:
remote_mount = ""
cmd = [
"gcsfuse",
f"--only-dir={remote_mount}" if remote_mount != "" else None,
server_ip,
str(local_mount),
]
else:
if remote_mount is None:
remote_mount = Path("/etc/munge")
cmd = [
"mount",
f"--types={fs_type}",
f"--options={mount_options}" if mount_options != "" else None,
f"{server_ip}:{remote_mount}",
str(local_mount),
]
# wait max 120s for munge mount
timeout = 120
for retry, wait in enumerate(util.backoff_delay(0.5, timeout), 1):
try:
run(cmd, timeout=timeout)
break
except Exception as e:
log.error(
f"munge mount failed: '{cmd}' {e}, try {retry}, waiting {wait:0.2f}s"
)
time.sleep(wait)
err = e
continue
else:
raise err
log.info(f"Copy munge.key from: {local_mount}")
shutil.copy2(Path(local_mount / "munge.key"), munge_key)
log.info("Restrict permissions of munge.key")
shutil.chown(munge_key, user="munge", group="munge")
os.chmod(munge_key, stat.S_IRUSR)
log.info(f"Unmount {local_mount}")
if fs_type.lower() == "gcsfuse".lower():
run(f"fusermount -u {local_mount}", timeout=120)
else:
run(f"umount {local_mount}", timeout=120)
shutil.rmtree(local_mount)