in rela/thread_loop.h [74:105]
  virtual void mainLoop() final {
    TensorDict obs = {};
    torch::Tensor r;
    torch::Tensor t;
    while (!terminated()) {
      obs = env_->reset(obs);
      while (!env_->anyTerminated()) {
        if (terminated()) {
          break;
        }
        if (paused()) {
          waitUntilResume();
        }
        auto action = actor_->act(obs);
        std::tie(obs, r, t) = env_->step(action);
        if (eval_) {
          continue;
        }
        actor_->setRewardAndTerminal(r, t);
        actor_->postStep();
      }
      // eval only runs for one game
      if (eval_) {
        break;
      }
    }
  }