gym-compete/gym_compete/new_envs/agents/humanoid.py (111 lines of code) (raw):
from .agent import Agent
from gym.spaces import Box
import numpy as np
def mass_center(mass, xpos):
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class Humanoid(Agent):
def __init__(self, agent_id, xml_path=None, **kwargs):
if xml_path is None:
xml_path = os.path.join(os.path.dirname(__file__), "assets", "humanoid_body.xml")
super(Humanoid, self).__init__(agent_id, xml_path, **kwargs)
self.team = 'walker'
def set_goal(self, goal):
self.GOAL = goal
self.move_left = False
if self.get_qpos()[0] > 0:
self.move_left = True
def before_step(self):
self._pos_before = mass_center(self.get_body_mass(), self.get_xipos())
def after_step(self, action):
pos_after = mass_center(self.get_body_mass(), self.get_xipos())
forward_reward = 0.25 * (pos_after - self._pos_before) / self.env.model.opt.timestep
if self.move_left:
forward_reward *= -1
ctrl_cost = .1 * np.square(action).sum()
cfrc_ext = self.get_cfrc_ext()
contact_cost = .5e-6 * np.square(cfrc_ext).sum()
contact_cost = min(contact_cost, 10)
qpos = self.get_qpos()
agent_standing = qpos[2] >= 1.0
survive = 5.0 if agent_standing else -5.
reward = forward_reward - ctrl_cost - contact_cost + survive
reward_goal = - np.abs(np.asscalar(qpos[0]) - self.GOAL)
reward += reward_goal
reward_info = dict()
reward_info['reward_forward'] = forward_reward
reward_info['reward_ctrl'] = ctrl_cost
reward_info['reward_contact'] = contact_cost
reward_info['reward_survive'] = survive
if self.team == 'walker':
reward_info['reward_goal_dist'] = reward_goal
reward_info['reward_move'] = reward
# done = not agent_standing
done = qpos[2] < 0.8
return reward, done, reward_info
def _get_obs(self):
'''
Return agent's observations
'''
my_pos = self.get_qpos()
other_pos = self.get_other_qpos()
vel = self.get_qvel()
cfrc_ext = np.clip(self.get_cfrc_ext(), -1, 1)
cvel = self.get_cvel()
cinert = self.get_cinert()
qfrc_actuator = self.get_qfrc_actuator()
obs = np.concatenate(
[my_pos.flat, vel.flat,
cinert.flat, cvel.flat,
qfrc_actuator.flat, cfrc_ext.flat,
other_pos.flat]
)
assert np.isfinite(obs).all(), "Humanoid observation is not finite!!"
return obs
def _get_obs_relative(self):
'''
Return agent's observations, positions are relative
'''
qpos = self.get_qpos()
my_pos = qpos[2:]
other_agents_qpos = self.get_other_agent_qpos()
all_other_qpos = []
for i in range(self.n_agents):
if i == self.id: continue
other_qpos = other_agents_qpos[i]
other_relative_xy = other_qpos[:2] - qpos[:2]
other_qpos = np.concatenate([other_relative_xy.flat, other_qpos[2:].flat], axis=0)
all_other_qpos.append(other_qpos)
all_other_qpos = np.concatenate(all_other_qpos)
vel = self.get_qvel()
cfrc_ext = np.clip(self.get_cfrc_ext(), -1, 1)
cvel = self.get_cvel()
cinert = self.get_cinert()
qfrc_actuator = self.get_qfrc_actuator()
obs = np.concatenate(
[my_pos.flat, vel.flat,
cinert.flat, cvel.flat,
qfrc_actuator.flat, cfrc_ext.flat,
all_other_qpos.flat]
)
assert np.isfinite(obs).all(), "Humanoid observation is not finite!!"
return obs
def set_observation_space(self):
obs = self._get_obs()
self.obs_dim = obs.size
high = np.inf * np.ones(self.obs_dim)
low = -high
self.observation_space = Box(low, high)
def reached_goal(self):
xpos = self.get_body_com('torso')[0]
if self.GOAL > 0 and xpos > self.GOAL:
return True
elif self.GOAL < 0 and xpos < self.GOAL:
return True
return False
def reset_agent(self):
xpos = self.get_qpos()[0]
if xpos * self.GOAL > 0 :
self.set_goal(-self.GOAL)
if xpos > 0:
self.move_left = True
else:
self.move_left = False