in policy.py [0:0]
def __init__(self, scope="running", reuse=False, epsilon=1e-2, shape=()):
with tf.variable_scope(scope, reuse=reuse):
self._sum = tf.get_variable(
dtype=tf.float32,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="sum", trainable=False)
self._sumsq = tf.get_variable(
dtype=tf.float32,
shape=shape,
initializer=tf.constant_initializer(epsilon),
name="sumsq", trainable=False)
self._count = tf.get_variable(
dtype=tf.float32,
shape=(),
initializer=tf.constant_initializer(epsilon),
name="count", trainable=False)
self.shape = shape
self.mean = tf.to_float(self._sum / self._count)
var_est = tf.to_float(self._sumsq / self._count) - tf.square(self.mean)
self.std = tf.sqrt(tf.maximum(var_est, 1e-2))