in randomized_uncertain_social_preferences/rusp/env_oasis.py [0:0]
def step(self, action):
self.is_dead = np.logical_or(self.healths <= 0, self.time_since_death < self.steps_freeze_on_death)
# If an agent just died, its health will be <= 0, update position and health
for i in np.where(self.healths <= 0)[0]:
x_ind = self.unwrapped.sim.model.joint_name2id(f'agent{i}:tx')
y_ind = self.unwrapped.sim.model.joint_name2id(f'agent{i}:ty')
fs = self.unwrapped.floor_size
self.unwrapped.sim.data.qpos[x_ind] = np.random.choice([np.random.uniform(-1, 0), np.random.uniform(fs, fs + 1)])
self.unwrapped.sim.data.qpos[y_ind] = np.random.choice([np.random.uniform(-1, 0), np.random.uniform(fs, fs + 1)])
self.healths[i] = self.max_health
self.time_since_death[i] = 0
self.agent_died_count[i] += 1
self.unwrapped.sim.forward() # Forward the sim so their position gets updated sooner
# Zero out actions for all dead agents
if np.any(self.is_dead):
for ac_key, ac in action.items():
ac[self.is_dead] = self.zero_action[ac_key][self.is_dead]
obs, rew, done, info = self.env.step(action)
# Update healths
self.healths[~self.is_dead] += info['health_delta'][~self.is_dead] # only change health of alive agents
self.healths = np.minimum(self.healths, self.max_health)
self.time_since_death += 1
rew[self.healths <= 0] += self.death_rew
# Reward for living
rew[~self.is_dead] += self.life_rew
# Done stats
if done:
info['n_unique_died'] = np.sum(self.agent_died_count > 0)
info['only_one_died'] = (np.sum(self.agent_died_count > 0) == 1)
info['n_died'] = np.sum(self.agent_died_count)
info['n_died_min'] = np.min(self.agent_died_count)
info['n_died_max'] = np.max(self.agent_died_count)
info['n_died_std'] = np.std(self.agent_died_count)
info['n_died_total_minus_max'] = np.sum(self.agent_died_count) - np.max(self.agent_died_count)
return self.observation(obs), rew, done, info