in rlcc/thread_loop.h [25:93]
virtual void mainLoop() override {
while (!terminated()) {
// go over each envs in sequential order
// call in seperate for-loops to maximize parallization
for (size_t i = 0; i < envs_.size(); ++i) {
if (done_[i] == 1) {
continue;
}
auto& actors = actors_[i];
if (envs_[i]->terminated()) {
// we only run 1 game for evaluation
if (eval_) {
++done_[i];
if (done_[i] == 1) {
numDone_ += 1;
if (numDone_ == (int)envs_.size()) {
return;
}
}
}
envs_[i]->reset();
for (size_t j = 0; j < actors.size(); ++j) {
actors[j]->reset(*envs_[i]);
}
}
for (size_t j = 0; j < actors.size(); ++j) {
actors[j]->observeBeforeAct(*envs_[i]);
}
}
for (size_t i = 0; i < envs_.size(); ++i) {
if (done_[i] == 1) {
continue;
}
auto& actors = actors_[i];
int curPlayer = envs_[i]->getCurrentPlayer();
for (size_t j = 0; j < actors.size(); ++j) {
actors[j]->act(*envs_[i], curPlayer);
}
}
for (size_t i = 0; i < envs_.size(); ++i) {
if (done_[i] == 1) {
continue;
}
auto& actors = actors_[i];
for (size_t j = 0; j < actors.size(); ++j) {
actors[j]->fictAct(*envs_[i]);
}
}
for (size_t i = 0; i < envs_.size(); ++i) {
if (done_[i] == 1) {
continue;
}
auto& actors = actors_[i];
for (size_t j = 0; j < actors.size(); ++j) {
actors[j]->observeAfterAct(*envs_[i]);
}
}
}
}