in bistro/processes/TaskSubprocessQueue.cpp [554:624]
void TaskSubprocessState::asyncSubprocessCallback(
const cpp2::RunningTask& rt,
folly::Subprocess& proc) noexcept {
int signal = 0; // No signal
// Is it time to send a previously scheduled KILL?
if (killAfterTicks_ > 0) {
--killAfterTicks_;
if (killAfterTicks_ == 0) {
signal = SIGKILL;
}
}
cpp2::KillRequest kill_req;
while (queue_.read(kill_req)) {
switch (*kill_req.method_ref()) {
case cpp2::KillMethod::KILL:
signal = SIGKILL; // KILL takes precedence over an outstanding TERM
killAfterTicks_ = 0; // No point in sending already-scheduled kills
break;
case cpp2::KillMethod::TERM_WAIT_KILL:
if (signal == 0) { // TERM does not replace an outstanding KILL
signal = SIGTERM;
}
// Pick the earlier kill time of the two -- only one SIGKILL will fire.
{
uint32_t ticks =
std::max(*kill_req.killWaitMs_ref(), 0) / pollMs(opts_);
killAfterTicks_ =
(killAfterTicks_ == 0) ? ticks : std::min(ticks, killAfterTicks_);
}
break;
case cpp2::KillMethod::TERM:
if (signal == 0) { // TERM does not replace an outstanding KILL
signal = SIGTERM;
}
break;
default: // Not reached, checked in TaskSubprocessState::kill()
LOG(FATAL) << "Unknown kill method: "
<< static_cast<int>(*kill_req.method_ref());
}
}
// AsyncSubprocess guarantees that the process had not yet been wait()ed
// for when this callback runs, so sendSignal() is not racy, and is
// guaranteed to go to the right process / pgid.
if (signal != 0) {
// Do not signal the cgroup here because:
// - Well-behaved tasks will respond just fine to signaling the PGID.
// - Signaling only the cgroup takes longer and can fail in more ways.
// - Signaling both can confuse applications that have SIGTERM handling.
// Therefore, cgroup killing only kicks in after the child has exited.
CHECK(proc.returnCode().running());
auto pid = proc.pid();
CHECK(pid > 1);
if (*opts_.processGroupLeader_ref()) {
pid = -pid;
CHECK(pid < -1);
}
// FATAL since none of the POSIX error conditions can occur, unless we
// have a serious bug like signaling the wrong PID.
PLOG_IF(FATAL, ::kill(pid, signal) == -1)
<< "Failed to kill " << pid << " with " << signal;
wasKilled_ = true; // Alters "no status" handling, see above.
} else if (--numPolls_ <= 0) {
numPolls_ = getNumPolls(); // Reset the number of iterations to wait.
try {
resourceCallback_(rt, physicalResourceFetcher_.fetch());
} catch (const std::exception& ex) {
LOG(WARNING) << "Failed to fetch resources for task "
<< apache::thrift::debugString(rt) << ": " << ex.what();
}
}
}