in torchbeast/core/environment.py [0:0]
def initial(self):
initial_reward = torch.zeros(1, 1)
# This supports only single-tensor actions ATM.
initial_last_action = torch.zeros(1, 1, dtype=torch.int64)
self.episode_return = torch.zeros(1, 1)
self.episode_step = torch.zeros(1, 1, dtype=torch.int32)
initial_done = torch.ones(1, 1, dtype=torch.bool)
initial_frame = _format_frame(self.gym_env.reset())
return dict(
frame=initial_frame,
reward=initial_reward,
done=initial_done,
episode_return=self.episode_return,
episode_step=self.episode_step,
last_action=initial_last_action,
)