in gym/gym/envs/parameter_tuning/convergence.py [0:0]
def _step(self, action):
"""
Perform some action in the environment
"""
assert self.action_space.contains(action)
lr, decay, momentum, batch_size, l1, l2 = action;
# map ranges of inputs
lr = (10.0 ** lr[0]).astype('float32')
decay = (10.0 ** decay[0]).astype('float32')
momentum = (10.0 ** momentum[0]).astype('float32')
batch_size = int( 2 ** batch_size[0] )
l1 = (10.0 ** l1[0]).astype('float32')
l2 = (10.0 ** l2[0]).astype('float32')
"""
names = ["lr", "decay", "mom", "batch", "l1", "l2"]
values = [lr, decay, momentum, batch_size, l1, l2]
for n,v in zip(names, values):
print(n,v)
"""
X,Y,Xv,Yv = self.data
# set parameters of training step
self.sgd.lr.set_value(lr)
self.sgd.decay.set_value(decay)
self.sgd.momentum.set_value(momentum)
self.reg.l1.set_value(l1)
self.reg.l2.set_value(l2)
# train model for one epoch_idx
H = self.model.fit(X, Y,
batch_size=int(batch_size),
nb_epoch=1,
shuffle=True)
_, acc = self.model.evaluate(Xv,Yv)
# save best validation
if acc > self.best_val:
self.best_val = acc
self.previous_acc = acc;
self.epoch_idx = self.epoch_idx + 1
diverged = math.isnan( H.history['loss'][-1] )
done = self.epoch_idx == 20 or diverged
if diverged:
""" maybe not set to a very large value; if you get something nice,
but then diverge, maybe it is not too bad
"""
reward = -100.0
else:
reward = self.best_val
# as number of labels increases, learning problem becomes
# more difficult for fixed dataset size. In order to avoid
# for the agent to ignore more complex datasets, on which
# accuracy is low and concentrate on simple cases which bring bulk
# of reward, I normalize by number of labels in dataset
reward = reward * self.nb_classes
# formula below encourages higher best validation
reward = reward + reward ** 2
return self._get_obs(), reward, done, {}