in source/RLlibEnv/heuristics_utils.py [0:0]
def simulate(env, net, heuristics, number_of_snakes, use_random_snake):
state, _, _, infos = env.reset()
rgb_arrays = [env.render(mode="rgb_array")]
infos_array = [infos]
actions_array = [[4 for _ in range(number_of_snakes)]]
json_array = [env.get_json()]
heuristics_log_array = [{k: "" for k in range(number_of_snakes)}]
previous_move = {}
for i in range(number_of_snakes):
agent_id = "agent_{}".format(i)
previous_move[agent_id] = {"state": None,
"reward": None,
"action": None}
while True:
infos["current_turn"] += 1
heuristics_log = {}
actions = []
for i in range(number_of_snakes):
agent_id = "agent_{}".format(i)
state_i, obs = build_state_for_snake(state, i, previous_move[agent_id]["state"])
if use_random_snake:
action = np.random.uniform(size=(1, 4))
else:
action = get_action(net, state_i, previous_move[agent_id]["action"],
previous_move[agent_id]["reward"])
if is_snake_alive(env, i):
action, heuristics_log_string = heuristics.run_with_env(
state_i, snake_id=i,
turn_count=infos["current_turn"]+1,
health=infos["snake_health"],
env=env,
action=action)
else:
action = np.argmax(action[0])
heuristics_log_string = "Dead"
heuristics_log[i] = heuristics_log_string
actions.append(action)
next_state, rewards, dones, infos = env.step(actions)
for i in range(number_of_snakes):
agent_id = "agent_{}".format(i)
action = actions[i]
reward = rewards[i]
previous_move[agent_id] = {"state": obs,
"reward": reward,
"action": action}
rgb_array = env.render(mode="rgb_array")
rgb_arrays.append(rgb_array.copy())
infos_array.append(infos)
actions_array.append(actions)
heuristics_log_array.append(heuristics_log)
json_array.append(env.get_json())
# Check if only 1 snake remains
number_of_snakes_alive = sum(list(dones.values()))
if number_of_snakes - number_of_snakes_alive <= 1:
done = True
else:
done = False
if number_of_snakes == 1:
snakes_to_win = 0
else:
snakes_to_win = 1
if len(np.where(np.sum(next_state, axis=2)==5)[0]) == snakes_to_win:
done = True
else:
done = False
state = next_state
if done:
print("Completed")
break
return infos_array, rgb_arrays, actions_array, heuristics_log_array, json_array