in core/environment.py [0:0]
def step(self, action):
frame, reward, done, unused_info = self.gym_env.step(action.item())
self.episode_step += 1
self.episode_return += reward
episode_step = self.episode_step
episode_return = self.episode_return
if done:
frame = self.gym_env.reset()
self.episode_return = torch.zeros(1, 1)
self.episode_step = torch.zeros(1, 1, dtype=torch.int32)
reward = torch.tensor(reward).view(1, 1)
done = torch.tensor(done).view(1, 1)
out = dict(
reward=reward,
done=done,
episode_return=episode_return,
episode_step=episode_step
)
if isinstance(frame, dict):
out.update({k: v.view((1, 1) + v.shape) for k, v in frame.items()})
else:
out['frame'] = _format_frame(frame)
return out