def main()

in scheme_adapters/simple_rl_adapter/simple_rl_adapter.py [0:0]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-p", "--path", required=True, help="DAMOOS path")
    parser.add_argument("-lb", "--lazybox_path", required=True, help="Lazybox path")
    parser.add_argument("-w", "--workload", required=True, help="Workload name.")
    parser.add_argument("-n", "--num_iterations", required=False, help="Number of Iterations.")
    parser.add_argument("-lr", "--learning_rate", required=False, help="Learning Rate.")
    parser.add_argument("-e", "--epsilon", required=False, help="Epsilon.")
    parser.add_argument("-d", "--discount", required=False, help="Discount rate")
    parser.add_argument("-dm", "--damos_path", required=True, help="DAMOS Path")
    args = vars(parser.parse_args())

    path = args["path"]
    lazybox_path = args["lazybox_path"]
    workload = args["workload"]
    damos_path = args["damos_path"]
    
    if args["num_iterations"]:
        numiters = int(args["num_iterations"])
    else:
        numiters = 50

    if args["learning_rate"]:
        alpha = float(args["learning_rate"])
    else:
        alpha = 0.2

    if args["epsilon"]:
        epsilon = float(args["epsilon"])
    else:
        epsilon = 0.2

    if args["discount"]:
        discount = float(args["discount"])
    else:
        discount = 0.9

    ''' 
    The 21 states correspond to rss overhead of 0%:-4%, -5%:-9%, -10%:-14%.....-95%:-99%, >0%.
    100% reduction in rss is not possible as that will indicate a new rss of 0!
    '''
    num_states=21

    '''
    The 30 actions correspond to {min_age:3s,5s,7s,9s,11s,13s}X{min_size:4KB,8KB,12KB, 16KB, 20KB}
    '''
    num_actions=30

    # Initialize the Q-Table.
    Qvalue=np.random.rand(num_states,num_actions)

    # Initialize the System Environment
    system = System(path,lazybox_path, damos_path, workload)

    for i in range(numiters):
        state=system.reset()
        rew=0
        done=False
        while not done:
            randomnum = random.uniform(0,1)
            action=0
            if randomnum>=epsilon:
                lst=Qvalue[state_to_index(state)]
                action=lst.argmax(axis=0)
            else:
                action=random.randint(0,num_actions-1)

            reward,nextstate,done = system.step(action)
            if done:
                print(str(i)+". Reward", reward)
            rew=rew+reward
            nxtlist=Qvalue[state_to_index(nextstate)]
            currval=Qvalue[state_to_index(state)][action]
            Qvalue[state_to_index(state)][action-1] = currval +  alpha * (reward + discount*(max(nxtlist)) - currval)
            state=nextstate

    # Save the Q-values in a file
    if not os.path.exists(path + "/results/simple_rl"):
        os.makedirs(path + "/results/simple_rl")
    np.savetxt(path + "/results/simple_rl" + "/qvalue-"+workload+".txt", Qvalue, fmt='%f')

    # Evaluate
    avg_rew=0
    for i in range(5):
        state = system.reset()
        done=False
        while not done:
            action=Qvalue[state_to_index(state)].argmax(axis=0)
            reward, nextstate, done = system.step(action)
            if done:
                avg_rew = avg_rew + reward
                print("Final Evaluation "+str(i)+" reward = ",reward)
            state = nextstate
    print("Average Reward", avg_rew/5)