cookbooks/aws-parallelcluster-slurm/templates/default/slurm/resume_program.erb (11 lines of code) (raw):
#!/bin/bash
# ResumeProgram should read SLURM_RESUME_FILE within ten seconds of starting to guarantee that it still exists.
# ref https://slurm.schedmd.com/power_save.html#tolerance
source /etc/profile.d/aws-cli-default-config.sh
trap "rm -f ${SLURM_RESUME_FILE_TMP}" EXIT
SLURM_RESUME_FILE_TMP=$(mktemp)
cp ${SLURM_RESUME_FILE} ${SLURM_RESUME_FILE_TMP}
chgrp <%= node['cluster']['cluster_admin_slurm_share_group'] %> ${SLURM_RESUME_FILE_TMP}
chmod g+r ${SLURM_RESUME_FILE_TMP}
sudo -u <%= node['cluster']['cluster_admin_user'] %> SLURM_RESUME_FILE=${SLURM_RESUME_FILE_TMP} <%= node_virtualenv_path %>/bin/slurm_resume "$@"
rm -f ${SLURM_RESUME_FILE_TMP}