gym-compete/gym_compete/new_envs/multi_agent_scene.py (49 lines of code) (raw):
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
from gym import spaces
import os
class MultiAgentScene(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self, xml_path, n_agents):
self.n_agents = n_agents
self._mujoco_init = False
mujoco_env.MujocoEnv.__init__(self, xml_path, 5)
self._mujoco_init = True
utils.EzPickle.__init__(self)
def simulate(self, actions):
a = np.concatenate(actions, axis=0)
self.do_simulation(a, self.frame_skip)
def _step(self, actions):
'''
Just to satisfy mujoco_init, should not be used
'''
assert not self._mujoco_init, '_step should not be called on Scene'
return self._get_obs(), 0, False, None
def _get_obs(self):
'''
Just to satisfy mujoco_init, should not be used
'''
assert not self._mujoco_init, '_get_obs should not be called on Scene'
obs = np.concatenate([
self.model.data.qpos.flat,
self.model.data.qvel.flat
])
return obs
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return None
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = self.model.stat.extent * 0.55
# self.viewer.cam.distance = self.model.stat.extent * 0.65
# self.viewer.cam.distance = self.model.stat.extent * 1.5
self.viewer.cam.lookat[2] += .8
self.viewer.cam.elevation = -10
# self.viewer.cam.distance = self.model.stat.extent * 0.4
# self.viewer.cam.lookat[2] += 1.0
# self.viewer.cam.elevation = -25
# self.viewer.cam.azimuth = 0 if np.random.random() > 0.5 else 180
self.viewer.cam.azimuth = 90
# self.viewer.vopt.flags[8] = True
# self.viewer.vopt.flags[9] = True
rand = np.random.random()
if rand < 0.33:
self.viewer.cam.azimuth = 0
elif 0.33 <= rand < 0.66:
self.viewer.cam.azimuth = 90
else:
self.viewer.cam.azimuth = 180