gym-compete/gym_compete/new_envs/agents/ant.py (58 lines of code) (raw):

from .agent import Agent from gym.spaces import Box import numpy as np class Ant(Agent): def __init__(self, agent_id, xml_path=None): if xml_path is None: xml_path = os.path.join(os.path.dirname(__file__), "assets", "ant_body.xml") super(Ant, self).__init__(agent_id, xml_path) def set_goal(self, goal): self.GOAL = goal self.move_left = False if self.get_qpos()[0] > 0: self.move_left = True def before_step(self): self._xposbefore = self.get_body_com("torso")[0] def after_step(self, action): xposafter = self.get_body_com("torso")[0] forward_reward = (xposafter - self._xposbefore) / self.env.dt if self.move_left: forward_reward *= -1 ctrl_cost = .5 * np.square(action).sum() cfrc_ext = self.get_cfrc_ext() contact_cost = 0.5 * 1e-3 * np.sum( np.square(np.clip(cfrc_ext, -1, 1)) ) qpos = self.get_qpos() agent_standing = qpos[2] >= 0.28 survive = 1.0 if agent_standing else -1. reward = forward_reward - ctrl_cost - contact_cost + survive reward_info = dict() reward_info['reward_forward'] = forward_reward reward_info['reward_ctrl'] = ctrl_cost reward_info['reward_contact'] = contact_cost reward_info['reward_survive'] = survive reward_info['reward_move'] = reward done = not agent_standing return reward, done, reward_info def _get_obs(self): ''' Return agent's observations ''' my_pos = self.get_qpos() other_pos = self.get_other_qpos() my_vel = self.get_qvel() cfrc_ext = np.clip(self.get_cfrc_ext(), -1, 1) obs = np.concatenate( [my_pos.flat, my_vel.flat, cfrc_ext.flat, other_pos.flat] ) return obs def set_observation_space(self): obs = self._get_obs() self.obs_dim = obs.size high = np.inf * np.ones(self.obs_dim) low = -high self.observation_space = Box(low, high) def reached_goal(self): xpos = self.get_body_com('torso')[0] if self.GOAL > 0 and xpos > self.GOAL: return True elif self.GOAL < 0 and xpos < self.GOAL: return True return False def reset_agent(self): xpos = self.get_qpos()[0] if xpos * self.GOAL > 0 : self.set_goal(-self.GOAL) if xpos > 0: self.move_left = True else: self.move_left = False