in env_utils.py [0:0]
def initial(self):
initial_reward = torch.zeros(1, 1)
self.episode_return = torch.zeros(1, 1)
self.episode_step = torch.zeros(1, 1, dtype=torch.int32)
self.episode_win = torch.zeros(1, 1, dtype=torch.int32)
initial_done = torch.ones(1, 1, dtype=torch.uint8)
if self.fix_seed:
self.gym_env.seed(seed=self.env_seed)
initial_frame = _format_observation(self.gym_env.reset())
if self.gym_env.carrying:
carried_col, carried_obj = torch.LongTensor([[COLOR_TO_IDX[self.gym_env.carrying.color]]]), torch.LongTensor([[OBJECT_TO_IDX[self.gym_env.carrying.type]]])
else:
carried_col, carried_obj = torch.LongTensor([[5]]), torch.LongTensor([[1]])
return dict(
frame=initial_frame,
reward=initial_reward,
done=initial_done,
episode_return=self.episode_return,
episode_step=self.episode_step,
episode_win=self.episode_win,
carried_col = carried_col,
carried_obj = carried_obj)