in atari/atari_env.h [83:110]
rela::TensorDict reset() final {
// reset all attributes
ale_->reset_game();
state_->reset();
numSteps_ = 0;
// press start key if needed
pressStartKey();
int noOp = noOpStartSampler_(rng_);
for (int i = 0; i < noOp; ++i) {
int a = (*actionSampler_)(rng_);
while (legalActionMask_[a].item<float>() != (float)1) {
a = (*actionSampler_)(rng_);
}
aleStep(a);
}
// get first observation
ale_->getScreenRGB(state_->getObservationBuffer());
torch::Tensor obs = state_->computeFeature();
rela::TensorDict input = {
{"s", obs},
{"eps", exploreEps_},
{"legal_move", legalActionMask_}
};
return input;
}