in env_utils.py [0:0]
def step(self, action):
frame, reward, done, _ = self.gym_env.step(action.item())
self.episode_step += 1
episode_step = self.episode_step
self.episode_return += reward
episode_return = self.episode_return
if done and reward > 0:
self.episode_win[0][0] = 1
else:
self.episode_win[0][0] = 0
episode_win = self.episode_win
if done:
if self.fix_seed:
self.gym_env.seed(seed=self.env_seed)
frame = self.gym_env.reset()
self.episode_return = torch.zeros(1, 1)
self.episode_step = torch.zeros(1, 1, dtype=torch.int32)
self.episode_win = torch.zeros(1, 1, dtype=torch.int32)
frame = _format_observation(frame)
reward = torch.tensor(reward).view(1, 1)
done = torch.tensor(done).view(1, 1)
if self.gym_env.carrying:
carried_col, carried_obj = torch.LongTensor([[COLOR_TO_IDX[self.gym_env.carrying.color]]]), torch.LongTensor([[OBJECT_TO_IDX[self.gym_env.carrying.type]]])
else:
carried_col, carried_obj = torch.LongTensor([[5]]), torch.LongTensor([[1]])
return dict(
frame=frame,
reward=reward,
done=done,
episode_return=episode_return,
episode_step = episode_step,
episode_win = episode_win,
carried_col = carried_col,
carried_obj = carried_obj
)