def run()

in src/evaluate-unity.py [0:0]


def run(args, parser):

    if not args.config:
        # Load configuration from file
        config_dir = os.path.dirname(args.checkpoint)
        # params.json is saved in the model directory during ray training by default
        config_path = os.path.join(config_dir, "params.json")
        with open(config_path) as f:
            args.config = json.load(f)

    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

    ray.init(webui_host="127.0.0.1")

    agent_env_config = {"env_name": args.env}

    register_env("unity_env", lambda config: UnityEnvWrapper(agent_env_config))

    if ray.__version__ >= "0.6.5":
        from ray.rllib.agents.registry import get_agent_class
    else:
        from ray.rllib.agents.agent import get_agent_class

    cls = get_agent_class(args.algorithm)
    config = args.config
    config["monitor"] = False
    config["num_workers"] = 0
    config["num_gpus"] = 0
    agent = cls(env="unity_env", config=config)
    # Delete unnessesary logs
    env_name = args.env.split('.')[0]
    files = glob("/opt/ml/input/data/train/{}_Data/Logs/*.csv".format(env_name), recursive=True)
    for file in files:
        os.remove(file)

    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    env_config = {"env_name": args.env}

    if ray.__version__ >= "0.6.5":
        env = UnityEnvWrapper(env_config)
    else:
        from ray.rllib.agents.dqn.common.wrappers import wrap_dqn
        if args.algorithm == "DQN":
            env = UnityEnvWrapper(env_config)
            env = wrap_dqn(env, args.config.get("model", {}))
        else:
            env = ModelCatalog.get_preprocessor_as_wrapper(UnityEnvWrapper(env_config))

    env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True)
    all_rewards = []
    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))

    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))