tf_agents/bandits/policies/boltzmann_reward_prediction_policy.py [167:184]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        time_step_spec, action_spec,
        policy_state_spec=reward_network.state_spec,
        clip=False,
        info_spec=info_spec,
        emit_log_probability='log_probability' in emit_policy_info,
        observation_and_action_constraint_splitter=(
            observation_and_action_constraint_splitter),
        name=name)

  @property
  def accepts_per_arm_features(self):
    return self._accepts_per_arm_features

  def _variables(self):
    policy_variables = self._reward_network.variables
    for c in self._constraints:
      policy_variables.append(c.variables)
    return policy_variables
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


tf_agents/bandits/policies/greedy_reward_prediction_policy.py [131:148]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        time_step_spec, action_spec,
        policy_state_spec=reward_network.state_spec,
        clip=False,
        info_spec=info_spec,
        emit_log_probability='log_probability' in emit_policy_info,
        observation_and_action_constraint_splitter=(
            observation_and_action_constraint_splitter),
        name=name)

  @property
  def accepts_per_arm_features(self):
    return self._accepts_per_arm_features

  def _variables(self):
    policy_variables = self._reward_network.variables
    for c in self._constraints:
      policy_variables.append(c.variables)
    return policy_variables
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -