def compute_gradients()

in tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py [0:0]


    def compute_gradients(self,
                          loss,
                          var_list,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None,
                          gradient_tape=None):
      """DP-SGD version of base class method."""
      if callable(loss):
        # TF is running in Eager mode
        raise NotImplementedError('Vectorized optimizer unavailable for TF2.')
      else:
        # TF is running in graph mode, check we did not receive a gradient tape.
        if gradient_tape:
          raise ValueError('When in graph mode, a tape should not be passed.')

        batch_size = tf.shape(input=loss)[0]
        if self._num_microbatches is None:
          self._num_microbatches = batch_size

        # Note: it would be closer to the correct i.i.d. sampling of records if
        # we sampled each microbatch from the appropriate binomial distribution,
        # although that still wouldn't be quite correct because it would be
        # sampling from the dataset without replacement.
        microbatch_losses = tf.reshape(loss, [self._num_microbatches, -1])

        if var_list is None:
          var_list = (
              tf.trainable_variables() +
              tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))

        def process_microbatch(microbatch_loss):
          """Compute clipped grads for one microbatch."""
          microbatch_loss = tf.reduce_mean(input_tensor=microbatch_loss)
          grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients(
              microbatch_loss, var_list, gate_gradients, aggregation_method,
              colocate_gradients_with_ops, grad_loss))
          grads_list = [
              g if g is not None else tf.zeros_like(v)
              for (g, v) in zip(list(grads), var_list)
          ]
          # Clip gradients to have L2 norm of l2_norm_clip.
          # Here, we use TF primitives rather than the built-in
          # tf.clip_by_global_norm() so that operations can be vectorized
          # across microbatches.
          grads_flat = tf.nest.flatten(grads_list)
          squared_l2_norms = [
              tf.reduce_sum(input_tensor=tf.square(g)) for g in grads_flat
          ]
          global_norm = tf.sqrt(tf.add_n(squared_l2_norms))
          div = tf.maximum(global_norm / self._l2_norm_clip, 1.)
          clipped_flat = [g / div for g in grads_flat]
          clipped_grads = tf.nest.pack_sequence_as(grads_list, clipped_flat)
          return clipped_grads

        clipped_grads = tf.vectorized_map(process_microbatch, microbatch_losses)

        def reduce_noise_normalize_batch(stacked_grads):
          summed_grads = tf.reduce_sum(input_tensor=stacked_grads, axis=0)
          noise_stddev = self._l2_norm_clip * self._noise_multiplier
          noise = tf.random.normal(
              tf.shape(input=summed_grads), stddev=noise_stddev)
          noised_grads = summed_grads + noise
          return noised_grads / tf.cast(self._num_microbatches, tf.float32)

        final_grads = tf.nest.map_structure(reduce_noise_normalize_batch,
                                            clipped_grads)

        return list(zip(final_grads, var_list))