LogLines RemoteWorkerRunner::getJobLogs()

in bistro/runners/RemoteWorkerRunner.cpp [438:505]


LogLines RemoteWorkerRunner::getJobLogs(
    const string& logtype,
    const vector<string>& jobs,
    const vector<string>& nodes,
    int64_t line_id,
    bool is_ascending,
    const string& regex_filter) const {

  // We are going to query all the workers. This is wasteful, but it makes
  // it much easier to find logs for tasks, because:
  //  1) Multiple workers can have logs for different iterations of a task
  //  2) The logs API supports multi-queries, which, in some cases,
  //     require us to query all workers anyhow.
  std::vector<cpp2::ServiceAddress> services;
  std::vector<std::string> unhealthy_workers;
  std::vector<std::string> lost_workers;
  SYNCHRONIZED_CONST(workers_) {
    for (const auto& wconn : workers_.workerPool()) {
      const auto& w = wconn.second->getBistroWorker();
      // Instead of trying to fetch logs from unhealthy workers, which can
      // be slow, and degrade the user experience, display a "transient"
      // error right away.
      auto state = wconn.second->getState();
      if (state == RemoteWorkerState::State::UNHEALTHY) {
        unhealthy_workers.push_back(*w.shard_ref());
      } else if (state == RemoteWorkerState::State::MUST_DIE) {
        lost_workers.push_back(*w.shard_ref());
      } else {
        services.push_back(*w.addr_ref());
      }
    }
  }

  // Inform the user about the logs that we are not querying.
  std::string unqueried_workers;
  if (!unhealthy_workers.empty()) {
    unqueried_workers += "unhealthy: " + folly::join(", ", unhealthy_workers);
  }
  if (!lost_workers.empty()) {
    if (!unqueried_workers.empty()) {
      unqueried_workers += "; ";
    }
    unqueried_workers += "lost: " + folly::join(", ", lost_workers);
  }

  if (services.empty()) {
    if (unqueried_workers.empty()) {
      throw BistroException("No workers connected; cannot query logs.");
    } else {
      throw BistroException(
        "All workers are unhealthy; cannot query logs. Known workers: ",
        unqueried_workers
      );
    }
  }

  return getJobLogsThreadAndEventBaseSafe(
    unqueried_workers,
    services,
    logtype,
    jobs,
    nodes,
    line_id,
    is_ascending,
    regex_filter,
    workerClientFn_
  );
}