void TaskSubprocessState::asyncSubprocessCallback()

in bistro/processes/TaskSubprocessQueue.cpp [554:624]


void TaskSubprocessState::asyncSubprocessCallback(
    const cpp2::RunningTask& rt,
    folly::Subprocess& proc) noexcept {
  int signal = 0;  // No signal
  // Is it time to send a previously scheduled KILL?
  if (killAfterTicks_ > 0) {
    --killAfterTicks_;
    if (killAfterTicks_ == 0) {
      signal = SIGKILL;
    }
  }
  cpp2::KillRequest kill_req;
  while (queue_.read(kill_req)) {
    switch (*kill_req.method_ref()) {
      case cpp2::KillMethod::KILL:
        signal = SIGKILL;  // KILL takes precedence over an outstanding TERM
        killAfterTicks_ = 0;  // No point in sending already-scheduled kills
        break;
      case cpp2::KillMethod::TERM_WAIT_KILL:
        if (signal == 0) {  // TERM does not replace an outstanding KILL
          signal = SIGTERM;
        }
        // Pick the earlier kill time of the two -- only one SIGKILL will fire.
        {
          uint32_t ticks =
              std::max(*kill_req.killWaitMs_ref(), 0) / pollMs(opts_);
          killAfterTicks_ =
            (killAfterTicks_ == 0) ? ticks : std::min(ticks, killAfterTicks_);
        }
        break;
      case cpp2::KillMethod::TERM:
        if (signal == 0) {  // TERM does not replace an outstanding KILL
          signal = SIGTERM;
        }
        break;
      default:  // Not reached, checked in TaskSubprocessState::kill()
        LOG(FATAL) << "Unknown kill method: "
                   << static_cast<int>(*kill_req.method_ref());
    }
  }
  // AsyncSubprocess guarantees that the process had not yet been wait()ed
  // for when this callback runs, so sendSignal() is not racy, and is
  // guaranteed to go to the right process / pgid.
  if (signal != 0) {
    // Do not signal the cgroup here because:
    //  - Well-behaved tasks will respond just fine to signaling the PGID.
    //  - Signaling only the cgroup takes longer and can fail in more ways.
    //  - Signaling both can confuse applications that have SIGTERM handling.
    // Therefore, cgroup killing only kicks in after the child has exited.
    CHECK(proc.returnCode().running());
    auto pid = proc.pid();
    CHECK(pid > 1);
    if (*opts_.processGroupLeader_ref()) {
      pid = -pid;
      CHECK(pid < -1);
    }
    // FATAL since none of the POSIX error conditions can occur, unless we
    // have a serious bug like signaling the wrong PID.
    PLOG_IF(FATAL, ::kill(pid, signal) == -1)
      << "Failed to kill " << pid << " with " << signal;
    wasKilled_ = true;  // Alters "no status" handling, see above.
  } else if (--numPolls_ <= 0) {
    numPolls_ = getNumPolls();  // Reset the number of iterations to wait.
    try {
      resourceCallback_(rt, physicalResourceFetcher_.fetch());
    } catch (const std::exception& ex) {
      LOG(WARNING) << "Failed to fetch resources for task "
        << apache::thrift::debugString(rt) << ": " << ex.what();
    }
  }
}