def _init_policy_out()

in ma_policy/ma_policy.py [0:0]


    def _init_policy_out(self, pi, taken_actions):
        with tf.variable_scope('policy_out'):
            self.pdparams = {}
            for k in self.pdtypes.keys():
                with tf.variable_scope(k):
                    if self.gaussian_fixed_var and isinstance(self.ac_space.spaces[k], gym.spaces.Box):
                        mean = tf.layers.dense(pi["main"],
                                               self.pdtypes[k].param_shape()[0] // 2,
                                               kernel_initializer=normc_initializer(0.01),
                                               activation=None)
                        logstd = tf.get_variable(name="logstd",
                                                 shape=[1, self.pdtypes[k].param_shape()[0] // 2],
                                                 initializer=tf.zeros_initializer())
                        self.pdparams[k] = tf.concat([mean, mean * 0.0 + logstd], axis=2)
                    elif k in pi:
                        # This is just for the case of entity specific actions
                        if isinstance(self.ac_space.spaces[k], (gym.spaces.Discrete)):
                            assert pi[k].get_shape()[-1] == 1
                            self.pdparams[k] = pi[k][..., 0]
                        elif isinstance(self.ac_space.spaces[k], (gym.spaces.MultiDiscrete)):
                            assert np.prod(pi[k].get_shape()[-2:]) == self.pdtypes[k].param_shape()[0],\
                                f"policy had shape {pi[k].get_shape()} for action {k}, but required {self.pdtypes[k].param_shape()}"
                            new_shape = shape_list(pi[k])[:-2] + [np.prod(pi[k].get_shape()[-2:]).value]
                            self.pdparams[k] = tf.reshape(pi[k], shape=new_shape)
                        else:
                            assert False
                    else:
                        self.pdparams[k] = tf.layers.dense(pi["main"],
                                                           self.pdtypes[k].param_shape()[0],
                                                           kernel_initializer=normc_initializer(0.01),
                                                           activation=None)

            with tf.variable_scope('pds'):
                self.pds = {k: pdtype.pdfromflat(self.pdparams[k])
                            for k, pdtype in self.pdtypes.items()}

            with tf.variable_scope('sampled_action'):
                self.sampled_action = {k: pd.sample() if self.stochastic else pd.mode()
                                       for k, pd in self.pds.items()}
            with tf.variable_scope('sampled_action_logp'):
                self.sampled_action_logp = sum([self.pds[k].logp(self.sampled_action[k])
                                                for k in self.pdtypes.keys()])
            with tf.variable_scope('entropy'):
                self.entropy = sum([pd.entropy() for pd in self.pds.values()])
            with tf.variable_scope('taken_action_logp'):
                self.taken_action_logp = sum([self.pds[k].logp(taken_actions[k])
                                              for k in self.pdtypes.keys()])