in spinup/algos/tf1/trpo/core.py [0:0]
def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation, action_space):
act_dim = a.shape.as_list()[-1]
mu = mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation)
log_std = tf.get_variable(name='log_std', initializer=-0.5*np.ones(act_dim, dtype=np.float32))
std = tf.exp(log_std)
pi = mu + tf.random_normal(tf.shape(mu)) * std
logp = gaussian_likelihood(a, mu, log_std)
logp_pi = gaussian_likelihood(pi, mu, log_std)
old_mu_ph, old_log_std_ph = placeholders(act_dim, act_dim)
d_kl = diagonal_gaussian_kl(mu, log_std, old_mu_ph, old_log_std_ph)
info = {'mu': mu, 'log_std': log_std}
info_phs = {'mu': old_mu_ph, 'log_std': old_log_std_ph}
return pi, logp, logp_pi, info, info_phs, d_kl