in baselines/her/normalizer.py [0:0]
def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None):
"""A normalizer that ensures that observations are approximately distributed according to
a standard Normal distribution (i.e. have mean zero and variance one).
Args:
size (int): the size of the observation to be normalized
eps (float): a small constant that avoids underflows
default_clip_range (float): normalized observations are clipped to be in
[-default_clip_range, default_clip_range]
sess (object): the TensorFlow session to be used
"""
self.size = size
self.eps = eps
self.default_clip_range = default_clip_range
self.sess = sess if sess is not None else tf.get_default_session()
self.local_sum = np.zeros(self.size, np.float32)
self.local_sumsq = np.zeros(self.size, np.float32)
self.local_count = np.zeros(1, np.float32)
self.sum_tf = tf.get_variable(
initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum',
trainable=False, dtype=tf.float32)
self.sumsq_tf = tf.get_variable(
initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq',
trainable=False, dtype=tf.float32)
self.count_tf = tf.get_variable(
initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count',
trainable=False, dtype=tf.float32)
self.mean = tf.get_variable(
initializer=tf.zeros_initializer(), shape=(self.size,), name='mean',
trainable=False, dtype=tf.float32)
self.std = tf.get_variable(
initializer=tf.ones_initializer(), shape=(self.size,), name='std',
trainable=False, dtype=tf.float32)
self.count_pl = tf.placeholder(name='count_pl', shape=(1,), dtype=tf.float32)
self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size,), dtype=tf.float32)
self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size,), dtype=tf.float32)
self.update_op = tf.group(
self.count_tf.assign_add(self.count_pl),
self.sum_tf.assign_add(self.sum_pl),
self.sumsq_tf.assign_add(self.sumsq_pl)
)
self.recompute_op = tf.group(
tf.assign(self.mean, self.sum_tf / self.count_tf),
tf.assign(self.std, tf.sqrt(tf.maximum(
tf.square(self.eps),
self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf)
))),
)
self.lock = threading.Lock()