tf_agents/bandits/environments/drifting_linear_environment.py [207:227]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return reward

  @gin.configurable
  def compute_optimal_reward(
      self, observation: types.NestedTensor) -> types.NestedTensor:
    deterministic_reward = tf.matmul(
        observation, self._current_observation_to_reward)
    optimal_action_reward = tf.reduce_max(deterministic_reward, axis=-1)
    return optimal_action_reward

  @gin.configurable
  def compute_optimal_action(
      self, observation: types.NestedTensor) -> types.NestedTensor:
    deterministic_reward = tf.matmul(
        observation, self._current_observation_to_reward)
    optimal_action = tf.argmax(
        deterministic_reward, axis=-1, output_type=tf.int32)
    return optimal_action


@gin.configurable
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


tf_agents/bandits/environments/piecewise_stochastic_environment.py [184:204]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return reward

  @gin.configurable
  def compute_optimal_reward(
      self, observation: types.NestedTensor) -> types.NestedTensor:
    deterministic_reward = tf.matmul(
        observation, self._current_observation_to_reward)
    optimal_action_reward = tf.reduce_max(deterministic_reward, axis=-1)
    return optimal_action_reward

  @gin.configurable
  def compute_optimal_action(
      self, observation: types.NestedTensor) -> types.NestedTensor:
    deterministic_reward = tf.matmul(
        observation, self._current_observation_to_reward)
    optimal_action = tf.argmax(
        deterministic_reward, axis=-1, output_type=tf.int32)
    return optimal_action


@gin.configurable
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -