in gym/gym/envs/algorithmic/algorithmic_env.py [0:0]
def _step(self, action):
assert self.action_space.contains(action)
self.last_action = action
inp_act, out_act, pred = action
done = False
reward = 0.0
self.time += 1
assert 0 <= self.write_head_position
if out_act == 1:
try:
correct = pred == self.target[self.write_head_position]
except IndexError:
logger.warn("It looks like you're calling step() even though this "+
"environment has already returned done=True. You should always call "+
"reset() once you receive done=True. Any further steps are undefined "+
"behaviour.")
correct = False
if correct:
reward = 1.0
else:
# Bail as soon as a wrong character is written to the tape
reward = -0.5
done = True
self.write_head_position += 1
if self.write_head_position >= len(self.target):
done = True
self._move(inp_act)
if self.time > self.time_limit:
reward = -1.0
done = True
obs = self._get_obs()
self.last_reward = reward
self.episode_total_reward += reward
return (obs, reward, done, {})