def simulate()

in source/RLlibEnv/heuristics_utils.py [0:0]


def simulate(env, net, heuristics, number_of_snakes, use_random_snake):    
    state, _, _, infos  = env.reset()

    rgb_arrays = [env.render(mode="rgb_array")]
    infos_array = [infos]
    actions_array = [[4 for _ in range(number_of_snakes)]]
    json_array = [env.get_json()]
        
    heuristics_log_array = [{k: "" for k in range(number_of_snakes)}]

    previous_move = {}
    for i in range(number_of_snakes):
        agent_id = "agent_{}".format(i)
        previous_move[agent_id] = {"state": None,
                                   "reward": None,
                                   "action": None}
        
    while True:
        infos["current_turn"] += 1

        heuristics_log = {}       
        actions = []
        for i in range(number_of_snakes):
            agent_id = "agent_{}".format(i)
            
            state_i, obs = build_state_for_snake(state, i, previous_move[agent_id]["state"])
            
            if use_random_snake:
                action = np.random.uniform(size=(1, 4))
            else:
                action = get_action(net, state_i, previous_move[agent_id]["action"],
                                    previous_move[agent_id]["reward"])
            
            if is_snake_alive(env, i):
                action, heuristics_log_string = heuristics.run_with_env(
                                                    state_i, snake_id=i,
                                                    turn_count=infos["current_turn"]+1,
                                                    health=infos["snake_health"],
                                                    env=env,
                                                    action=action)
            else:
                action = np.argmax(action[0])
                heuristics_log_string = "Dead"
            
            heuristics_log[i] = heuristics_log_string
            
            actions.append(action)
        
        next_state, rewards, dones, infos = env.step(actions)
        
        for i in range(number_of_snakes):
            agent_id = "agent_{}".format(i)
            action = actions[i]
            reward = rewards[i]
            previous_move[agent_id] = {"state": obs,
                                       "reward": reward,
                                       "action": action}

        rgb_array = env.render(mode="rgb_array")
        rgb_arrays.append(rgb_array.copy())
        infos_array.append(infos)
        actions_array.append(actions)
        heuristics_log_array.append(heuristics_log)
        json_array.append(env.get_json())
        
        # Check if only 1 snake remains
        number_of_snakes_alive = sum(list(dones.values()))
        if number_of_snakes - number_of_snakes_alive <= 1:
            done = True
        else:
            done = False
        
        if number_of_snakes == 1:
            snakes_to_win = 0
        else:
            snakes_to_win = 1
            
        if len(np.where(np.sum(next_state, axis=2)==5)[0]) == snakes_to_win:
            done = True
        else:
            done = False
            
        state = next_state
        if done:
            print("Completed")
            break  

    return infos_array, rgb_arrays, actions_array, heuristics_log_array, json_array