in ma_policy/normalizers.py [0:0]
def __init__(self, beta, scope="ema", reuse=None, epsilon=1e-6, per_element_update=False, shape=(), version=1):
self._version = version
self._per_element_update = per_element_update
with tf.variable_scope(scope, reuse=reuse):
# Expected value of x
self._biased_mean = tf.get_variable(
dtype=tf.float32,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="mean",
trainable=False)
# Expected value of x^2
self._biased_sq = tf.get_variable(
dtype=tf.float32,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="sq",
trainable=False)
# How to integrate observations of x over time
self._one_minus_beta = 1.0 - beta
# Weight placed on ema[-1] == 0.0 which we divide out to debias
self._debiasing_term = tf.get_variable(
dtype=tf.float32,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="debiasing_term",
trainable=False)
self.shape = shape
# the stored mean and square are biased due to setting ema[-1] = 0.0,
# we correct for this by dividing by the debiasing term:
self.mean = self._biased_mean / tf.maximum(self._debiasing_term, epsilon)
self.std = _std_from_mean_and_square(mean=self.mean, square=self._biased_sq / tf.maximum(self._debiasing_term, epsilon))