agent_zoo/RoboschoolPong_v0_2017may1.py [4:50]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def relu(x):
    return np.maximum(x, 0)

class SmallReactivePolicy:
    "Simple multi-layer perceptron policy, no internal state"
    def __init__(self, ob_space, ac_space):
        assert weights_dense1_w.shape == (ob_space.shape[0], 64)
        assert weights_dense2_w.shape == (64, 32)
        assert weights_final_w.shape  == (32, ac_space.shape[0])

    def act(self, ob):
        x = ob
        x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
        x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
        x = np.dot(x, weights_final_w) + weights_final_b
        return x

def demo_run():
    env = gym.make("RoboschoolPong-v1")
    if len(sys.argv)==3: env.unwrapped.multiplayer(env, sys.argv[1], player_n=int(sys.argv[2]))

    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                if still_open!=True:      # not True in multiplayer or non-Roboschool environment
                    break
                restart_delay = 60*2  # 2 sec at 60 fps
            restart_delay -= 1
            if restart_delay==0: break

weights_dense1_w = np.array([
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


agent_zoo/RoboschoolPong_v0_2017may2.py [4:50]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def relu(x):
    return np.maximum(x, 0)

class SmallReactivePolicy:
    "Simple multi-layer perceptron policy, no internal state"
    def __init__(self, ob_space, ac_space):
        assert weights_dense1_w.shape == (ob_space.shape[0], 64)
        assert weights_dense2_w.shape == (64, 32)
        assert weights_final_w.shape  == (32, ac_space.shape[0])

    def act(self, ob):
        x = ob
        x = relu(np.dot(x, weights_dense1_w) + weights_dense1_b)
        x = relu(np.dot(x, weights_dense2_w) + weights_dense2_b)
        x = np.dot(x, weights_final_w) + weights_final_b
        return x

def demo_run():
    env = gym.make("RoboschoolPong-v1")
    if len(sys.argv)==3: env.unwrapped.multiplayer(env, sys.argv[1], player_n=int(sys.argv[2]))

    pi = SmallReactivePolicy(env.observation_space, env.action_space)

    while 1:
        frame = 0
        score = 0
        restart_delay = 0
        obs = env.reset()

        while 1:
            a = pi.act(obs)
            obs, r, done, _ = env.step(a)
            score += r
            frame += 1
            still_open = env.render("human")
            if still_open==False:
                return
            if not done: continue
            if restart_delay==0:
                print("score=%0.2f in %i frames" % (score, frame))
                if still_open!=True:      # not True in multiplayer or non-Roboschool environment
                    break
                restart_delay = 60*2  # 2 sec at 60 fps
            restart_delay -= 1
            if restart_delay==0: break

weights_dense1_w = np.array([
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -