def step()

in randomized_uncertain_social_preferences/rusp/env_oasis.py [0:0]


    def step(self, action):
        self.is_dead = np.logical_or(self.healths <= 0, self.time_since_death < self.steps_freeze_on_death)
        # If an agent just died, its health will be <= 0, update position and health
        for i in np.where(self.healths <= 0)[0]:
            x_ind = self.unwrapped.sim.model.joint_name2id(f'agent{i}:tx')
            y_ind = self.unwrapped.sim.model.joint_name2id(f'agent{i}:ty')
            fs = self.unwrapped.floor_size
            self.unwrapped.sim.data.qpos[x_ind] = np.random.choice([np.random.uniform(-1, 0), np.random.uniform(fs, fs + 1)])
            self.unwrapped.sim.data.qpos[y_ind] = np.random.choice([np.random.uniform(-1, 0), np.random.uniform(fs, fs + 1)])
            self.healths[i] = self.max_health
            self.time_since_death[i] = 0
            self.agent_died_count[i] += 1
        self.unwrapped.sim.forward()  # Forward the sim so their position gets updated sooner

        # Zero out actions for all dead agents
        if np.any(self.is_dead):
            for ac_key, ac in action.items():
                ac[self.is_dead] = self.zero_action[ac_key][self.is_dead]

        obs, rew, done, info = self.env.step(action)

        # Update healths
        self.healths[~self.is_dead] += info['health_delta'][~self.is_dead]  # only change health of alive agents
        self.healths = np.minimum(self.healths, self.max_health)
        self.time_since_death += 1

        rew[self.healths <= 0] += self.death_rew

        # Reward for living
        rew[~self.is_dead] += self.life_rew

        # Done stats
        if done:
            info['n_unique_died'] = np.sum(self.agent_died_count > 0)
            info['only_one_died'] = (np.sum(self.agent_died_count > 0) == 1)
            info['n_died'] = np.sum(self.agent_died_count)
            info['n_died_min'] = np.min(self.agent_died_count)
            info['n_died_max'] = np.max(self.agent_died_count)
            info['n_died_std'] = np.std(self.agent_died_count)
            info['n_died_total_minus_max'] = np.sum(self.agent_died_count) - np.max(self.agent_died_count)

        return self.observation(obs), rew, done, info