std::pair computeState()

in bistro/remote/RemoteWorkerState.h [89:137]


  std::pair<State, bool> computeState(
    int64_t cur_time,
    int32_t max_healthcheck_gap,
    int32_t max_heartbeat_gap,
    int32_t lose_unhealthy_worker_after,
    // Not part of the state since it MUST be ephemeral -- we only want the
    // "consensus allows a worker to become healthy" flag to be used if it
    // makes the worker healthy *immediately*.
    bool allowed_to_become_healthy
  ) const {
    bool disallowed = false;
    if (state_ == State::MUST_DIE) {  // Can never leave this state
      return std::make_pair(State::MUST_DIE, disallowed);
    }
    State new_state = State::HEALTHY;
    // The ways to leave the NEW state are: (i) go to MUST_DIE after
    // lose_unhealthy_worker_after seconds, or (ii) via
    // RemoteWorker::initializeRunningTasks or BistroWorkerHandler::heartbeat
    if (state_ == State::NEW) {
      new_state = State::NEW;
    } else if (
      (cur_time > timeLastGoodHealthcheckSent_ + max_healthcheck_gap)
      || (cur_time > timeLastHeartbeatReceived_ + max_heartbeat_gap)
    ) {
      new_state = State::UNHEALTHY;
    } else if (!allowed_to_become_healthy && !hasBeenHealthy_) {
      new_state = State::UNHEALTHY;
      disallowed = true;
    }

    if (
      // This is ONLY true when the worker is otherwise healthy, but is
      // blocked by consensus.  Don't lose such workers, since that behavior
      // is actively harmful when we are having trouble achieving consensus
      // due to high worker turnover (see README.worker_set_consensus).
      !disallowed &&
      lose_unhealthy_worker_after > 0 &&
      // Without this check, we'd use a stale timeBecameUnhealthy_ when
      // changing from HEALTHY to UNHEALTHY.  Using != matches NEW.
      new_state != State::HEALTHY && state_ != State::HEALTHY &&
      // For NEW workers, the timeout begins at initialization time.
      cur_time > timeBecameUnhealthy_ + lose_unhealthy_worker_after
      // Don't need to add FLAGS_worker_check_interval because a worker
      // always takes at least that long to go from UNHEALTHY to MUST_DIE.
    ) {
      return std::make_pair(State::MUST_DIE, disallowed);
    }
    return std::make_pair(new_state, disallowed);
  }