cookbooks/aws-parallelcluster-slurm/templates/default/slurm/head_node/update_munge_key.sh.erb (97 lines of code) (raw):
#!/bin/bash
# This script updates the munge key used in the system.
# It fetches the key from AWS Secrets Manager or generates one if it doesn't exist.
# The script does not require any argument.
#
# Usage: ./update_munge_key.sh
# #
set -e
MUNGE_KEY_FILE="/etc/munge/munge.key"
SECRET_ARN="<%= @munge_key_secret_arn %>"
REGION="<%= node['cluster']['region'] %>"
MUNGE_USER="<%= node['cluster']['munge']['user'] %>"
MUNGE_GROUP="<%= node['cluster']['munge']['group'] %>"
SHARED_DIRECTORY_COMPUTE="<%= node['cluster']['shared_dir'] %>"
SHARED_DIRECTORY_LOGIN="<%= node['cluster']['shared_dir_login_nodes'] %>"
DISABLE_RESOURCE_CHECK=false
while getopts "d" opt; do
case $opt in
d) DISABLE_RESOURCE_CHECK=true;;
*)
echo "Usage: $0 [-d]" >&2
exit 1
;;
esac
done
if ! $DISABLE_RESOURCE_CHECK; then
# Check compute fleet status
compute_fleet_status=$(get-compute-fleet-status.sh)
if ! echo "$compute_fleet_status" | grep -q '"status": "STOPPED"'; then
echo "Compute fleet is not stopped. Please stop it before updating the munge key."
exit 1
fi
# Check LoginNodes status
CHECK_LOGIN_NODES_SCRIPT_PATH="<%= node['cluster']['scripts_dir'] %>/slurm/check_login_nodes_stopped.sh"
# Check if the script exists
if [ -f "$CHECK_LOGIN_NODES_SCRIPT_PATH" ]; then
# Check if login nodes are running
if ! $CHECK_LOGIN_NODES_SCRIPT_PATH; then
exit 1
fi
fi
fi
# If SECRET_ARN is provided, fetch the munge key from Secrets Manager
if [ -n "${SECRET_ARN}" ]; then
echo "Fetching munge key from AWS Secrets Manager: ${SECRET_ARN}"
encoded_key=$(aws secretsmanager get-secret-value --secret-id ${SECRET_ARN} --query 'SecretString' --output text --region ${REGION})
if [ -z "${encoded_key}" ]; then
echo "Error fetching munge key from Secrets Manager or the key is empty"
exit 1
fi
# Decode munge key and write to munge.key file
decoded_key=$(printf "%s" "${encoded_key}" | base64 -d)
if [ $? -ne 0 ]; then
echo "Error decoding the munge key with base64"
exit 1
fi
# Check munge key size
key_size=$(printf "%s" "${encoded_key}" | base64 -d | wc -c)
if [ $key_size -lt 32 ] || [ $key_size -gt 1024 ]; then
echo "Fetched munge key size is out of valid range [256-8192 bits]."
exit 1
fi
printf "%s" "${encoded_key}" | base64 -d > ${MUNGE_KEY_FILE}
# Set ownership on the key
chown ${MUNGE_USER}:${MUNGE_GROUP} ${MUNGE_KEY_FILE}
# Enforce correct permission on the key
chmod 0600 ${MUNGE_KEY_FILE}
else
echo "MUNGE KEY SECRET ARN isn't provided"
exit 1
fi
# Enable and restart munge service
systemctl enable munge
echo "Restarting munge service"
systemctl restart munge
# Wait for a short period
sleep 5
# Check if munge service is running
if systemctl --quiet is-active munge; then
echo "Munge service is active"
else
echo "Failed to restart munge service"
exit 1
fi
<% if node["cluster"]["node_type"] != "ExternalSlurmDbd" -%>
# Share munge key
SHARED_DIRECTORIES=(${SHARED_DIRECTORY_COMPUTE} ${SHARED_DIRECTORY_LOGIN})
for dir in "${SHARED_DIRECTORIES[@]}"; do
echo "Sharing munge key to $dir"
mkdir -p "$dir/.munge"
cp /etc/munge/munge.key "$dir/.munge/.munge.key"
chmod 0700 "$dir/.munge"
chmod 0600 "$dir/.munge/.munge.key"
done
echo "Shared munge key"
<% end -%>
exit 0