in level_replay/level_sampler.py [0:0]
def _score_transform(self, transform, temperature, scores):
if transform == 'constant':
weights = np.ones_like(scores)
if transform == 'max':
weights = np.zeros_like(scores)
scores = scores[:]
scores[self.unseen_seed_weights > 0] = -float('inf') # only argmax over seen levels
argmax = np.random.choice(np.flatnonzero(np.isclose(scores, scores.max())))
weights[argmax] = 1.
elif transform == 'eps_greedy':
weights = np.zeros_like(scores)
weights[scores.argmax()] = 1. - self.eps
weights += self.eps/len(self.seeds)
elif transform == 'rank':
temp = np.flip(scores.argsort())
ranks = np.empty_like(temp)
ranks[temp] = np.arange(len(temp)) + 1
weights = 1/ranks ** (1./temperature)
elif transform == 'power':
eps = 0 if self.staleness_coef > 0 else 1e-3
weights = (np.array(scores) + eps) ** (1./temperature)
elif transform == 'softmax':
weights = np.exp(np.array(scores)/temperature)
return weights