in rela/thread_loop.h [74:105]
virtual void mainLoop() final {
TensorDict obs = {};
torch::Tensor r;
torch::Tensor t;
while (!terminated()) {
obs = env_->reset(obs);
while (!env_->anyTerminated()) {
if (terminated()) {
break;
}
if (paused()) {
waitUntilResume();
}
auto action = actor_->act(obs);
std::tie(obs, r, t) = env_->step(action);
if (eval_) {
continue;
}
actor_->setRewardAndTerminal(r, t);
actor_->postStep();
}
// eval only runs for one game
if (eval_) {
break;
}
}
}