in atari/atari_env.h [177:193]
float aleStep(int actIdx) {
assert(!ale_->game_over());
float reward = 0;
for (int i = 0; i < frameSkip_; i++) {
// need previous observation buffer for max
if (i == frameSkip_ - 1) {
ale_->getScreenRGB(state_->getPrevObservationBuffer());
}
assert(actIdx >= 0 && actIdx < (int)legalAction_.size());
assert(legalActionMask_[actIdx].item<float>() == (float)1);
reward += ale_->act(legalAction_[actIdx]);
}
numSteps_++;
return reward;
}