def kernel_classifier_distance_and_std_from_activations()

in tensorflow_gan/python/eval/classifier_metrics.py [0:0]


def kernel_classifier_distance_and_std_from_activations(activations1,
                                                        activations2,
                                                        max_block_size=1024,
                                                        dtype=None):
  """Kernel "classifier" distance for evaluating a generative model.

  This methods computes the kernel classifier distance from activations of
  real images and generated images. This can be used independently of the
  kernel_classifier_distance() method, especially in the case of using large
  batches during evaluation where we would like to precompute all of the
  activations before computing the classifier distance, or if we want to
  compute multiple metrics based on the same images. It also returns a rough
  estimate of the standard error of the estimator.

  This technique is described in detail in https://arxiv.org/abs/1801.01401.
  Given two distributions P and Q of activations, this function calculates

      E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
        - 2 E_{X ~ P, Y ~ Q}[k(X, Y)]

  where k is the polynomial kernel

      k(x, y) = ( x^T y / dimension + 1 )^3.

  This captures how different the distributions of real and generated images'
  visual features are. Like the Frechet distance (and unlike the Inception
  score), this is a true distance and incorporates information about the
  target images. Unlike the Frechet score, this function computes an
  *unbiased* and asymptotically normal estimator, which makes comparing
  estimates across models much more intuitive.

  The estimator used takes time quadratic in max_block_size. Larger values of
  max_block_size will decrease the variance of the estimator but increase the
  computational cost. This differs slightly from the estimator used by the
  original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
  The estimate of the standard error will also be more reliable when there are
  more blocks, i.e. when max_block_size is smaller.

  NOTE: the blocking code assumes that real_activations and
  generated_activations are both in random order. If either is sorted in a
  meaningful order, the estimator will behave poorly.

  Args:
    activations1: 2D Tensor containing activations. Shape is
      [batch_size, activation_size].
    activations2: 2D Tensor containing activations. Shape is
      [batch_size, activation_size].
    max_block_size: integer, default 1024. The distance estimator splits samples
      into blocks for computational efficiency. Larger values are more
      computationally expensive but decrease the variance of the distance
      estimate. Having a smaller block size also gives a better estimate of the
      standard error.
    dtype: If not None, coerce activations to this dtype before computations.

  Returns:
   The Kernel Inception Distance. A floating-point scalar of the same type
     as the output of the activations.
   An estimate of the standard error of the distance estimator (a scalar of
     the same type).
  """
  activations1.shape.assert_has_rank(2)
  activations2.shape.assert_has_rank(2)
  activations1.shape[1:2].assert_is_compatible_with(activations2.shape[1:2])

  if dtype is None:
    dtype = activations1.dtype
    assert activations2.dtype == dtype
  else:
    activations1 = tf.cast(activations1, dtype)
    activations2 = tf.cast(activations2, dtype)

  # Figure out how to split the activations into blocks of approximately
  # equal size, with none larger than max_block_size.
  n_r = tf.shape(input=activations1)[0]
  n_g = tf.shape(input=activations2)[0]

  n_bigger = tf.maximum(n_r, n_g)
  n_blocks = tf.cast(tf.math.ceil(n_bigger / max_block_size), dtype=tf.int32)

  v_r = n_r // n_blocks
  v_g = n_g // n_blocks

  n_plusone_r = n_r - v_r * n_blocks
  n_plusone_g = n_g - v_g * n_blocks

  sizes_r = tf.concat([
      tf.fill([n_blocks - n_plusone_r], v_r),
      tf.fill([n_plusone_r], v_r + 1),
  ], 0)
  sizes_g = tf.concat([
      tf.fill([n_blocks - n_plusone_g], v_g),
      tf.fill([n_plusone_g], v_g + 1),
  ], 0)

  zero = tf.zeros([1], dtype=tf.int32)
  inds_r = tf.concat([zero, tf.cumsum(sizes_r)], 0)
  inds_g = tf.concat([zero, tf.cumsum(sizes_g)], 0)

  dim = tf.cast(activations1.shape[1], dtype)

  def compute_kid_block(i):
    """Computes the ith block of the KID estimate."""
    r_s = inds_r[i]
    r_e = inds_r[i + 1]
    r = activations1[r_s:r_e]
    m = tf.cast(r_e - r_s, dtype)

    g_s = inds_g[i]
    g_e = inds_g[i + 1]
    g = activations2[g_s:g_e]
    n = tf.cast(g_e - g_s, dtype)

    k_rr = (tf.matmul(r, r, transpose_b=True) / dim + 1)**3
    k_rg = (tf.matmul(r, g, transpose_b=True) / dim + 1)**3
    k_gg = (tf.matmul(g, g, transpose_b=True) / dim + 1)**3
    return (-2 * tf.reduce_mean(input_tensor=k_rg) +
            (tf.reduce_sum(input_tensor=k_rr) - tf.linalg.trace(k_rr)) /
            (m * (m - 1)) +
            (tf.reduce_sum(input_tensor=k_gg) - tf.linalg.trace(k_gg)) /
            (n * (n - 1)))

  ests = tf.map_fn(
      compute_kid_block, tf.range(n_blocks), dtype=dtype, back_prop=False)

  mn = tf.reduce_mean(input_tensor=ests)

  # tf.nn.moments doesn't use the Bessel correction, which we want here
  n_blocks_ = tf.cast(n_blocks, dtype)
  var = tf.cond(
      pred=tf.less_equal(n_blocks, 1),
      true_fn=lambda: tf.constant(float('nan'), dtype=dtype),
      false_fn=lambda: tf.reduce_sum(input_tensor=tf.square(ests - mn)) / (
          n_blocks_ - 1))

  return mn, tf.sqrt(var / n_blocks_)