def step()

in mae_envs/envs/box_locking.py [0:0]


    def step(self, action):
        if self.task == 'order':
            """
                you can unlock any locked objs but only lock objs when all previous ones are locked
            """
            if len(self.unlocked_objs) > 1:
                action[self.act_key][:, self.unlocked_objs[1:]] = 0

        obs, rew, done, info = self.env.step(action)
        curr_objs_locked = obs[self.lock_key].flatten().astype(np.int8)

        rew += self._get_lock_reward(curr_objs_locked, old_objs_locked=self.objs_locked)

        self.objs_locked = curr_objs_locked
        self.unlocked_objs = [i for i in self.obj_order if self.objs_locked[i] == 0]

        new_next_obj, new_next_obj_dist = self._get_next_obj(obs)
        agent_pos = obs[self.agent_key][:, :2]
        new_spawn_pos_dist = np.linalg.norm(agent_pos - self.spawn_pos)
        rew += self._get_shaped_reward(new_next_obj, new_next_obj_dist, new_spawn_pos_dist)

        self.spawn_pos_dist = new_spawn_pos_dist
        self.next_obj_dist = new_next_obj_dist
        self.next_obj = new_next_obj

        n_unlocked = len(self.unlocked_objs)
        if n_unlocked == 0 and ((not self.need_return) or
                                self.spawn_pos_dist <= self.return_threshold):
            # reward for successfully completing the task
            rew += self.success_reward

        return obs, rew, done, info