gym-compete/gym_compete/new_envs/agents/humanoid_kicker.py (59 lines of code) (raw):
from .humanoid import Humanoid
from gym.spaces import Box
import numpy as np
from .agent import Agent
import six
def mass_center(mass, xpos):
return (np.sum(mass * xpos, 0) / np.sum(mass))
class HumanoidKicker(Humanoid):
def __init__(self, agent_id, xml_path=None):
if xml_path is None:
xml_path = os.path.join(os.path.dirname(__file__), "assets", "humanoid_body.xml")
super(HumanoidKicker, self).__init__(agent_id, xml_path)
self.team = 'walker'
self.TARGET = 4 if agent_id == 0 else -4
self.TARGET_Y = 3
def set_env(self, env):
self.ball_jnt_id = env.model.joint_names.index(six.b('ball'))
self.ball_jnt_nqpos = Agent.JNT_NPOS[int(env.model.jnt_type[self.ball_jnt_id])]
super(HumanoidKicker, self).set_env(env)
def get_ball_qpos(self):
start_idx = int(self.env.model.jnt_qposadr[self.ball_jnt_id])
return self.env.model.data.qpos[start_idx:start_idx+self.ball_jnt_nqpos]
def get_ball_qvel(self):
start_idx = int(self.env.model.jnt_dofadr[self.ball_jnt_id])
# ball has 6 components: 3d translation, 3d rotational
return self.env.model.data.qvel[start_idx:start_idx+6]
def set_goal(self, goal):
ball_ini_xyz = self.get_ball_qpos()
self.GOAL = np.asscalar(ball_ini_xyz[0])
self.TARGET = goal
self.move_left = False
if self.get_qpos()[0] - self.GOAL > 0:
self.move_left = True
def after_step(self, action):
action = np.clip(action, self.action_space.low, self.action_space.high)
# print(action)
_, done, rinfo = super(HumanoidKicker, self).after_step(action)
ball_xy = self.get_ball_qpos()[:2]
my_xy = self.get_qpos()[:2]
ball_dist = np.sqrt(np.sum((my_xy - ball_xy)**2))
rinfo['reward_goal_dist'] = np.asscalar(ball_dist)
reward = rinfo['reward_forward'] - rinfo['reward_ctrl'] - rinfo['reward_contact'] + rinfo['reward_survive'] - rinfo['reward_goal_dist']
rinfo['reward_move'] = reward
assert np.isfinite(reward), (rinfo, action)
return reward, done, rinfo
def _get_obs(self):
state = super(HumanoidKicker, self)._get_obs_relative()
ball_xyz = self.get_ball_qpos()[:3]
relative_xy = ball_xyz[:2] - self.get_qpos()[:2]
relative_xyz = np.concatenate([relative_xy.flat, ball_xyz[2].flat])
ball_goal_dist = self.TARGET - ball_xyz[0]
ball_qvel = self.get_ball_qvel()[:3]
ball_goal_y_dist1 = np.asarray(self.TARGET_Y - ball_xyz[1])
ball_goal_y_dist2 = np.asarray(-self.TARGET_Y - ball_xyz[1])
obs = np.concatenate([state.flat, relative_xyz.flat, np.asarray(ball_goal_dist).flat, ball_goal_y_dist1.flat, ball_goal_y_dist2.flat])
assert np.isfinite(obs).all(), "Humanoid Kicker observation is not finite!!"
return obs
def reached_goal(self):
return False
def reset_agent(self):
pass