gym-compete/gym_compete/new_envs/agents/humanoid_fighter.py (40 lines of code) (raw):

from .agent import Agent from .humanoid import Humanoid from gym.spaces import Box import numpy as np import six def mass_center(mass, xpos): return (np.sum(mass * xpos, 0) / np.sum(mass))[0] class HumanoidFighter(Humanoid): def __init__(self, agent_id, xml_path=None, team='blocker', **kwargs): super(HumanoidFighter, self).__init__(agent_id, xml_path, **kwargs) self.team = team def before_step(self): pass def set_env(self, env): super(HumanoidFighter, self).set_env(env) self.arena_id = self.env.model.geom_names.index(six.b('arena')) self.arena_height = self.env.model.geom_size[self.arena_id][1] * 2 def after_step(self, action): action = np.clip(action, self.action_space.low, self.action_space.high) ctrl_cost = .1 * np.square(action).sum() cfrc_ext = self.get_cfrc_ext() contact_cost = .5e-6 * np.square(cfrc_ext).sum() contact_cost = min(contact_cost, 10) qpos = self.get_qpos() center_reward = - np.sqrt(np.sum((0. - qpos[:2])**2)) agent_standing = qpos[2] - self.arena_height >= 1.0 survive = 5.0 if agent_standing else -5. reward = center_reward - ctrl_cost - contact_cost + survive # reward = survive reward_info = dict() # reward_info['reward_forward'] = forward_reward reward_info['reward_center'] = center_reward reward_info['reward_ctrl'] = ctrl_cost reward_info['reward_contact'] = contact_cost reward_info['reward_survive'] = survive reward_info['reward_move'] = reward done = bool(qpos[2] - self.arena_height <= 0.5) return reward, done, reward_info def _get_obs(self): ''' Return agent's observations ''' qpos = self.get_qpos() my_pos = qpos other_qpos = self.get_other_qpos() other_pos = other_qpos other_relative_xy = other_qpos[:2] - qpos[:2] vel = self.get_qvel() cfrc_ext = np.clip(self.get_cfrc_ext(), -1, 1) cvel = self.get_cvel() cinert = self.get_cinert() qfrc_actuator = self.get_qfrc_actuator() torso_xmat = self.get_torso_xmat() obs = np.concatenate( [my_pos.flat, vel.flat, cinert.flat, cvel.flat, qfrc_actuator.flat, cfrc_ext.flat, other_pos.flat, other_relative_xy.flat, torso_xmat.flat] ) assert np.isfinite(obs).all(), "Humanoid observation is not finite!!" return obs def reached_goal(self): return False