tf_agents/bandits/environments/drifting_linear_environment.py [155:191]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    self._current_observation_to_reward = tf.compat.v2.Variable(
        observation_to_reward_distribution.sample(),
        dtype=tf.float32,
        name='observation_to_reward')
    self._current_additive_reward = tf.compat.v2.Variable(
        additive_reward_distribution.sample(),
        dtype=tf.float32,
        name='additive_reward')

  @property
  def batch_size(self) -> types.Int:
    return tf.compat.dimension_value(
        self._observation_distribution.batch_shape[0])

  @property
  def observation_spec(self) -> types.TensorSpec:
    return tensor_spec.TensorSpec(
        shape=self._observation_distribution.batch_shape[1:],
        dtype=self._observation_distribution.dtype,
        name='observation_spec')

  @property
  def action_spec(self) -> types.BoundedTensorSpec:
    return tensor_spec.BoundedTensorSpec(
        shape=(),
        dtype=tf.int32,
        minimum=0,
        maximum=tf.compat.dimension_value(
            self._additive_reward_distribution.batch_shape[0]) - 1,
        name='action')

  def observation(self, unused_t) -> types.NestedTensor:
    return self._observation_distribution.sample()

  def reward(self,
             observation: types.NestedTensor,
             t: types.Int) -> types.NestedTensor:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


tf_agents/bandits/environments/piecewise_stochastic_environment.py [114:150]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    self._current_observation_to_reward = tf.compat.v2.Variable(
        observation_to_reward_distribution.sample(),
        dtype=tf.float32,
        name='observation_to_reward')
    self._current_additive_reward = tf.compat.v2.Variable(
        additive_reward_distribution.sample(),
        dtype=tf.float32,
        name='additive_reward')

  @property
  def batch_size(self) -> types.Int:
    return tf.compat.dimension_value(
        self._observation_distribution.batch_shape[0])

  @property
  def observation_spec(self) -> types.TensorSpec:
    return tensor_spec.TensorSpec(
        shape=self._observation_distribution.batch_shape[1:],
        dtype=self._observation_distribution.dtype,
        name='observation_spec')

  @property
  def action_spec(self) -> types.BoundedTensorSpec:
    return tensor_spec.BoundedTensorSpec(
        shape=(),
        dtype=tf.int32,
        minimum=0,
        maximum=tf.compat.dimension_value(
            self._additive_reward_distribution.batch_shape[0]) - 1,
        name='action')

  def observation(self, unused_t) -> types.NestedTensor:
    return self._observation_distribution.sample()

  def reward(self,
             observation: types.NestedTensor,
             t: types.Int) -> types.NestedTensor:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -