in community/front-end/ofe/infrastructure_files/gcs_bucket/clusters/ansible_setup/roles/c2_daemon/files/ghpcfe_c2daemon.py [0:0]
def cb_spack_install(message):
"""Spack application installation handler"""
ackid = message.get("ackid", None)
appid = message.get("app_id", None)
app_name = message["name"]
logger.info(
"Starting Spack Install for %s:%s - Message: %s",
appid,
app_name,
message,
)
spack_stdout = f"/opt/cluster/installs/{appid}/{app_name}.out"
spack_stderr = f"/opt/cluster/installs/{appid}/{app_name}.err"
gcs_tgt_out = f"installs/{appid}/stdout"
gcs_tgt_err = f"installs/{appid}/stderr"
(jobid, outfile, errfile) = _spack_submit_build(
appid,
message["partition"],
app_name,
message["spec"],
message["extra_sbatch"],
)
if not jobid:
# There was an error - stdout, stderr in outfile, errfile
logger.error(
"Failed to run batch submission for %s:%s", appid, app_name
)
_upload_log_blobs(
{
gcs_tgt_out: outfile,
gcs_tgt_err: errfile,
}
)
send_message(
"ACK",
{"ackid": ackid, "app_id": appid, "jobid": jobid, "status": "e"},
)
return
logger.info("Job Queued")
send_message(
"UPDATE",
{"ackid": ackid, "app_id": appid, "jobid": jobid, "status": "q"},
)
state = "PENDING"
while state in ["PENDING", "CONFIGURING"]:
time.sleep(30)
state = _slurm_get_job_state(jobid)
if state == "RUNNING":
logger.info("Spack build job running for %s:%s", appid, app_name)
send_message(
"UPDATE",
{"ackid": ackid, "app_id": appid, "jobid": jobid, "status": "i"},
)
while state in ["RUNNING"]:
time.sleep(30)
state = _slurm_get_job_state(jobid)
try:
_upload_log_files(
{gcs_tgt_out: spack_stdout, gcs_tgt_err: spack_stderr}
)
except Exception as err:
logger.error(
"Failed to upload log files for %s:%s",
appid,
app_name,
exc_info=err,
)
logger.info(
"Job for %s:%s completed with result %s", appid, app_name, state
)
status = "r" if state in ["COMPLETED", "COMPLETING"] else "e"
final_update = {"ackid": ackid, "app_id": appid, "status": status}
if status == "r":
final_update.update(
_spack_confirm_install(
app_name, f"/opt/cluster/installs/{appid}/{app_name}.out"
)
)
logger.info(
"Uploading log files for %s:%s - (state: %s)",
appid,
app_name,
final_update["status"],
)
try:
_upload_log_files(
{gcs_tgt_out: spack_stdout, gcs_tgt_err: spack_stderr}
)
except Exception as err:
logger.error("Failed to upload log files", exc_info=err)
send_message("ACK", final_update)