agent_zoo/demo_race1.py (41 lines of code) (raw):

import os, sys, subprocess import numpy as np import gym import roboschool if len(sys.argv)==1: import roboschool.multiplayer stadium = roboschool.scene_stadium.MultiplayerStadiumScene(gravity=9.8, timestep=0.0165/4, frame_skip=4) gameserver = roboschool.multiplayer.SharedMemoryServer(stadium, "race", want_test_window=True) # We start subprocesses between constructor and serve_forever(), because constructor creates necessary pipes to connect to for n in range(stadium.players_count): subprocess.Popen([sys.executable, sys.argv[0], "race", "%i"%n]) gameserver.serve_forever() else: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf config = tf.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1, device_count = { "GPU": 0 } ) sess = tf.InteractiveSession(config=config) # If this gives you an error, try CUDA_VISIBLE_DEVICES= (nothing visible) from RoboschoolWalker2d_v1_2017jul import ZooPolicyTensorflow as PolWalker from RoboschoolHopper_v1_2017jul import ZooPolicyTensorflow as PolHopper from RoboschoolHalfCheetah_v1_2017jul import ZooPolicyTensorflow as PolHalfCheetah from RoboschoolHumanoid_v1_2017jul import ZooPolicyTensorflow as PolHumanoid1 from RoboschoolHumanoidFlagrun_v1_2017jul import ZooPolicyTensorflow as PolHumanoid2 # Flagrun and Harder is compatible with normal Humanoid in observations and actions. possible_participants = [ ("RoboschoolWalker2d-v1", PolWalker), ("RoboschoolHopper-v1", PolHopper), ("RoboschoolHalfCheetah-v1", PolHalfCheetah), ("RoboschoolHumanoid-v1", PolHumanoid1), ("RoboschoolHumanoid-v1", PolHumanoid2), ] env_id, PolicyClass = possible_participants[ np.random.randint(len(possible_participants)) ] env = gym.make(env_id) env.unwrapped.multiplayer(env, game_server_guid=sys.argv[1], player_n=int(sys.argv[2])) pi = PolicyClass("mymodel", env.observation_space, env.action_space) while 1: obs = env.reset() while 1: a = pi.act(obs, None) obs, rew, done, info = env.step(a) if done: break