in src/evaluate-unity.py [0:0]
def run(args, parser):
if not args.config:
# Load configuration from file
config_dir = os.path.dirname(args.checkpoint)
# params.json is saved in the model directory during ray training by default
config_path = os.path.join(config_dir, "params.json")
with open(config_path) as f:
args.config = json.load(f)
if not args.env:
if not args.config.get("env"):
parser.error("the following arguments are required: --env")
args.env = args.config.get("env")
ray.init(webui_host="127.0.0.1")
agent_env_config = {"env_name": args.env}
register_env("unity_env", lambda config: UnityEnvWrapper(agent_env_config))
if ray.__version__ >= "0.6.5":
from ray.rllib.agents.registry import get_agent_class
else:
from ray.rllib.agents.agent import get_agent_class
cls = get_agent_class(args.algorithm)
config = args.config
config["monitor"] = False
config["num_workers"] = 0
config["num_gpus"] = 0
agent = cls(env="unity_env", config=config)
# Delete unnessesary logs
env_name = args.env.split('.')[0]
files = glob("/opt/ml/input/data/train/{}_Data/Logs/*.csv".format(env_name), recursive=True)
for file in files:
os.remove(file)
agent.restore(args.checkpoint)
num_episodes = int(args.evaluate_episodes)
env_config = {"env_name": args.env}
if ray.__version__ >= "0.6.5":
env = UnityEnvWrapper(env_config)
else:
from ray.rllib.agents.dqn.common.wrappers import wrap_dqn
if args.algorithm == "DQN":
env = UnityEnvWrapper(env_config)
env = wrap_dqn(env, args.config.get("model", {}))
else:
env = ModelCatalog.get_preprocessor_as_wrapper(UnityEnvWrapper(env_config))
env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True)
all_rewards = []
for episode in range(num_episodes):
steps = 0
state = env.reset()
done = False
reward_total = 0.0
while not done:
action = agent.compute_action(state)
next_state, reward, done, _ = env.step(action)
reward_total += reward
steps += 1
state = next_state
all_rewards.append(reward_total)
print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
print("Mean Reward:", np.mean(all_rewards))
print("Max Reward:", np.max(all_rewards))
print("Min Reward:", np.min(all_rewards))