in mae_envs/envs/hide_and_seek.py [0:0]
def step(self, action):
obs, rew, done, info = self.env.step(action)
if self.n_food > 0:
self.total_food_eaten += np.sum(obs['food_eat'])
if self.in_prep_phase and obs['prep_obs'][0, 0] == 1.0:
# Track statistics at end of preparation phase
self.in_prep_phase = False
if self.n_boxes > 0:
self.max_box_move_prep = np.max(np.linalg.norm(obs['box_pos'] - self.box_pos_start, axis=-1))
self.num_box_lock_prep = np.sum(obs['obj_lock'])
if self.n_ramps > 0:
self.max_ramp_move_prep = np.max(np.linalg.norm(obs['ramp_pos'] - self.ramp_pos_start, axis=-1))
if 'ramp_obj_lock' in obs:
self.num_ramp_lock_prep = np.sum(obs['ramp_obj_lock'])
if self.n_food > 0:
self.total_food_eaten_prep = self.total_food_eaten
if done:
# Track statistics at end of episode
if self.n_boxes > 0:
self.max_box_move = np.max(np.linalg.norm(obs['box_pos'] - self.box_pos_start, axis=-1))
self.num_box_lock = np.sum(obs['obj_lock'])
info.update({
'max_box_move_prep': self.max_box_move_prep,
'max_box_move': self.max_box_move,
'num_box_lock_prep': self.num_box_lock_prep,
'num_box_lock': self.num_box_lock})
if self.n_ramps > 0:
self.max_ramp_move = np.max(np.linalg.norm(obs['ramp_pos'] - self.ramp_pos_start, axis=-1))
info.update({
'max_ramp_move_prep': self.max_ramp_move_prep,
'max_ramp_move': self.max_ramp_move})
if 'ramp_obj_lock' in obs:
self.num_ramp_lock = np.sum(obs['ramp_obj_lock'])
info.update({
'num_ramp_lock_prep': self.num_ramp_lock_prep,
'num_ramp_lock': self.num_ramp_lock})
if self.n_food > 0:
info.update({
'food_eaten': self.total_food_eaten,
'food_eaten_prep': self.total_food_eaten_prep})
return obs, rew, done, info