in genie-agent/src/main/java/com/netflix/genie/agent/execution/process/impl/JobProcessManagerImpl.java [205:283]
public JobProcessResult waitFor() throws InterruptedException {
if (!launched.get()) {
throw new IllegalStateException("Process not launched");
}
final Process process = processReference.get();
int exitCode = 0;
if (process != null) {
exitCode = process.waitFor();
ConsoleLog.getLogger().info("Job process terminated with exit code: {}", exitCode);
}
try {
// Evil-but-necessary little hack.
// The agent and the child job process receive SIGINT at the same time (e.g. in case of ctrl-c).
// If the child terminates quickly, the code below will execute before the signal handler has a chance to
// set the job as killed, and the final status would be (incorrectly) reported as success/failure,
// depending on exit code, as opposed to killed.
// So give the handler a chance to raise the 'killed' flag before attempting to read it.
Thread.sleep(100);
} catch (final InterruptedException e) {
// Do nothing.
}
// If for whatever reason the timeout thread is currently running or if it is scheduled to be run, cancel it
final ScheduledFuture timeoutThreadFuture = this.timeoutKillThread.get();
if (timeoutThreadFuture != null) {
timeoutThreadFuture.cancel(true);
}
// Check exit code first to see if the job finishes successfully and returns SUCCEEDED as status,
// even the job gets a KILL request.
if (process != null && exitCode == SUCCESS_EXIT_CODE) {
return new JobProcessResult.Builder(
JobStatus.SUCCEEDED,
JobStatusMessages.JOB_FINISHED_SUCCESSFULLY,
exitCode
).build();
}
if (this.killed.get()) {
final KillService.KillSource source = ObjectUtils.firstNonNull(
killSource.get(), KillService.KillSource.API_KILL_REQUEST);
switch (source) {
case TIMEOUT:
return new JobProcessResult
.Builder(JobStatus.KILLED, JobStatusMessages.JOB_EXCEEDED_TIMEOUT, exitCode)
.build();
case FILES_LIMIT:
return new JobProcessResult
.Builder(JobStatus.KILLED, JobStatusMessages.JOB_EXCEEDED_FILES_LIMIT, exitCode)
.build();
case REMOTE_STATUS_MONITOR:
return new JobProcessResult
.Builder(JobStatus.KILLED, JobStatusMessages.JOB_MARKED_FAILED, exitCode)
.build();
case SYSTEM_SIGNAL:
// In interactive mode, killed by a system signal is mostly likely by a user (e.g. Ctrl-C)
return new JobProcessResult
.Builder(JobStatus.FAILED,
this.isInteractiveMode
? JobStatusMessages.JOB_KILLED_BY_USER
: JobStatusMessages.JOB_KILLED_BY_SYSTEM,
exitCode)
.build();
case API_KILL_REQUEST:
default:
return new JobProcessResult
.Builder(JobStatus.KILLED, JobStatusMessages.JOB_KILLED_BY_USER, exitCode)
.build();
}
}
final File initFailedFile = initFailedFileRef.get();
final String statusMessage = (initFailedFile != null && initFailedFile.exists())
? JobStatusMessages.JOB_SETUP_FAILED : JobStatusMessages.JOB_FAILED;
return new JobProcessResult.Builder(JobStatus.FAILED, statusMessage, exitCode).build();
}