in scheme_adapters/simple_rl_adapter/simple_rl_adapter.py [0:0]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--path", required=True, help="DAMOOS path")
parser.add_argument("-lb", "--lazybox_path", required=True, help="Lazybox path")
parser.add_argument("-w", "--workload", required=True, help="Workload name.")
parser.add_argument("-n", "--num_iterations", required=False, help="Number of Iterations.")
parser.add_argument("-lr", "--learning_rate", required=False, help="Learning Rate.")
parser.add_argument("-e", "--epsilon", required=False, help="Epsilon.")
parser.add_argument("-d", "--discount", required=False, help="Discount rate")
parser.add_argument("-dm", "--damos_path", required=True, help="DAMOS Path")
args = vars(parser.parse_args())
path = args["path"]
lazybox_path = args["lazybox_path"]
workload = args["workload"]
damos_path = args["damos_path"]
if args["num_iterations"]:
numiters = int(args["num_iterations"])
else:
numiters = 50
if args["learning_rate"]:
alpha = float(args["learning_rate"])
else:
alpha = 0.2
if args["epsilon"]:
epsilon = float(args["epsilon"])
else:
epsilon = 0.2
if args["discount"]:
discount = float(args["discount"])
else:
discount = 0.9
'''
The 21 states correspond to rss overhead of 0%:-4%, -5%:-9%, -10%:-14%.....-95%:-99%, >0%.
100% reduction in rss is not possible as that will indicate a new rss of 0!
'''
num_states=21
'''
The 30 actions correspond to {min_age:3s,5s,7s,9s,11s,13s}X{min_size:4KB,8KB,12KB, 16KB, 20KB}
'''
num_actions=30
# Initialize the Q-Table.
Qvalue=np.random.rand(num_states,num_actions)
# Initialize the System Environment
system = System(path,lazybox_path, damos_path, workload)
for i in range(numiters):
state=system.reset()
rew=0
done=False
while not done:
randomnum = random.uniform(0,1)
action=0
if randomnum>=epsilon:
lst=Qvalue[state_to_index(state)]
action=lst.argmax(axis=0)
else:
action=random.randint(0,num_actions-1)
reward,nextstate,done = system.step(action)
if done:
print(str(i)+". Reward", reward)
rew=rew+reward
nxtlist=Qvalue[state_to_index(nextstate)]
currval=Qvalue[state_to_index(state)][action]
Qvalue[state_to_index(state)][action-1] = currval + alpha * (reward + discount*(max(nxtlist)) - currval)
state=nextstate
# Save the Q-values in a file
if not os.path.exists(path + "/results/simple_rl"):
os.makedirs(path + "/results/simple_rl")
np.savetxt(path + "/results/simple_rl" + "/qvalue-"+workload+".txt", Qvalue, fmt='%f')
# Evaluate
avg_rew=0
for i in range(5):
state = system.reset()
done=False
while not done:
action=Qvalue[state_to_index(state)].argmax(axis=0)
reward, nextstate, done = system.step(action)
if done:
avg_rew = avg_rew + reward
print("Final Evaluation "+str(i)+" reward = ",reward)
state = nextstate
print("Average Reward", avg_rew/5)