in spinup/exercises/tf1/problem_set_2/exercise2_2.py [0:0]
def bugged_mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu,
output_activation=tf.tanh, action_space=None):
act_dim = a.shape.as_list()[-1]
act_limit = action_space.high[0]
with tf.variable_scope('pi'):
pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation)
with tf.variable_scope('q'):
q = mlp(tf.concat([x,a], axis=-1), list(hidden_sizes)+[1], activation, None)
with tf.variable_scope('q', reuse=True):
q_pi = mlp(tf.concat([x,pi], axis=-1), list(hidden_sizes)+[1], activation, None)
return pi, q, q_pi