spinup/exercises/tf1/problem_set_1_solutions/exercise1_2_soln.py (19 lines of code) (raw):
import tensorflow as tf
import numpy as np
EPS = 1e-8
def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
for h in hidden_sizes[:-1]:
x = tf.layers.dense(x, units=h, activation=activation)
return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation)
def gaussian_likelihood(x, mu, log_std):
pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi))
return tf.reduce_sum(pre_sum, axis=1)
def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation, action_space):
act_dim = a.shape.as_list()[-1]
mu = mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation)
log_std = tf.get_variable(name='log_std', initializer=-0.5*np.ones(act_dim, dtype=np.float32))
std = tf.exp(log_std)
pi = mu + tf.random_normal(tf.shape(mu)) * std
logp = gaussian_likelihood(a, mu, log_std)
logp_pi = gaussian_likelihood(pi, mu, log_std)
return pi, logp, logp_pi