cookbooks/aws-parallelcluster-slurm/templates/default/slurm/resume_program.erb (11 lines of code) (raw):

#!/bin/bash # ResumeProgram should read SLURM_RESUME_FILE within ten seconds of starting to guarantee that it still exists. # ref https://slurm.schedmd.com/power_save.html#tolerance source /etc/profile.d/aws-cli-default-config.sh trap "rm -f ${SLURM_RESUME_FILE_TMP}" EXIT SLURM_RESUME_FILE_TMP=$(mktemp) cp ${SLURM_RESUME_FILE} ${SLURM_RESUME_FILE_TMP} chgrp <%= node['cluster']['cluster_admin_slurm_share_group'] %> ${SLURM_RESUME_FILE_TMP} chmod g+r ${SLURM_RESUME_FILE_TMP} sudo -u <%= node['cluster']['cluster_admin_user'] %> SLURM_RESUME_FILE=${SLURM_RESUME_FILE_TMP} <%= node_virtualenv_path %>/bin/slurm_resume "$@" rm -f ${SLURM_RESUME_FILE_TMP}