ma_policy/util.py (38 lines of code) (raw):

import numpy as np import tensorflow as tf def normc_initializer(std=1.0, axis=0): def _initializer(shape, dtype=None, partition_info=None): # pylint: disable=W0613 out = np.random.randn(*shape).astype(np.float32) out *= std / np.sqrt(np.square(out).sum(axis=axis, keepdims=True)) return tf.constant(out) return _initializer def listdict2dictnp(l, keepdims=False): ''' Convert a list of dicts of numpy arrays to a dict of numpy arrays. If keepdims is False the new outer dimension in each dict element will be the length of the list If keepdims is True, then the new outdimension in each dict will be the sum of the outer dimensions of each item in the list ''' if keepdims: return {k: np.concatenate([d[k] for d in l]) for k in l[0]} else: return {k: np.array([d[k] for d in l]) for k in l[0]} def shape_list(x): ''' deal with dynamic shape in tensorflow cleanly ''' ps = x.get_shape().as_list() ts = tf.shape(x) return [ts[i] if ps[i] is None else ps[i] for i in range(len(ps))] def l2_loss(pred, label, std, mask): ''' Masked L2 loss with a scaling paramter (std). We made the choice that the loss would scale with the number of unmasked data points rather than have the same magnitude regardless of how many samples came in. TODO: Revisit whether this is the right choice. ''' if mask is None: return 0.5 * tf.reduce_mean(tf.square((pred - label) / std)) else: return 0.5 * tf.reduce_mean(mask * tf.square((pred - label) / std))