in coinrun/policies.py [0:0]
def impala_cnn(images, depths=[16, 32, 32]):
"""
Model used in the paper "IMPALA: Scalable Distributed Deep-RL with
Importance Weighted Actor-Learner Architectures" https://arxiv.org/abs/1802.01561
"""
use_batch_norm = Config.USE_BATCH_NORM == 1
dropout_layer_num = [0]
dropout_assign_ops = []
def dropout_layer(out):
if Config.DROPOUT > 0:
out_shape = out.get_shape().as_list()
num_features = np.prod(out_shape[1:])
var_name = 'mask_' + str(dropout_layer_num[0])
batch_seed_shape = out_shape[1:]
batch_seed = tf.get_variable(var_name, shape=batch_seed_shape, initializer=tf.random_uniform_initializer(minval=0, maxval=1), trainable=False)
batch_seed_assign = tf.assign(batch_seed, tf.random_uniform(batch_seed_shape, minval=0, maxval=1))
dropout_assign_ops.append(batch_seed_assign)
curr_mask = tf.sign(tf.nn.relu(batch_seed[None,...] - Config.DROPOUT))
curr_mask = curr_mask * (1.0 / (1.0 - Config.DROPOUT))
out = out * curr_mask
dropout_layer_num[0] += 1
return out
def conv_layer(out, depth):
out = tf.layers.conv2d(out, depth, 3, padding='same')
out = dropout_layer(out)
if use_batch_norm:
out = tf.contrib.layers.batch_norm(out, center=True, scale=True, is_training=True)
return out
def residual_block(inputs):
depth = inputs.get_shape()[-1].value
out = tf.nn.relu(inputs)
out = conv_layer(out, depth)
out = tf.nn.relu(out)
out = conv_layer(out, depth)
return out + inputs
def conv_sequence(inputs, depth):
out = conv_layer(inputs, depth)
out = tf.layers.max_pooling2d(out, pool_size=3, strides=2, padding='same')
out = residual_block(out)
out = residual_block(out)
return out
out = images
for depth in depths:
out = conv_sequence(out, depth)
out = tf.layers.flatten(out)
out = tf.nn.relu(out)
out = tf.layers.dense(out, 256, activation=tf.nn.relu)
return out, dropout_assign_ops