in tensorflow_gan/python/eval/classifier_metrics.py [0:0]
def kernel_classifier_distance_and_std_from_activations(activations1,
activations2,
max_block_size=1024,
dtype=None):
"""Kernel "classifier" distance for evaluating a generative model.
This methods computes the kernel classifier distance from activations of
real images and generated images. This can be used independently of the
kernel_classifier_distance() method, especially in the case of using large
batches during evaluation where we would like to precompute all of the
activations before computing the classifier distance, or if we want to
compute multiple metrics based on the same images. It also returns a rough
estimate of the standard error of the estimator.
This technique is described in detail in https://arxiv.org/abs/1801.01401.
Given two distributions P and Q of activations, this function calculates
E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
- 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
where k is the polynomial kernel
k(x, y) = ( x^T y / dimension + 1 )^3.
This captures how different the distributions of real and generated images'
visual features are. Like the Frechet distance (and unlike the Inception
score), this is a true distance and incorporates information about the
target images. Unlike the Frechet score, this function computes an
*unbiased* and asymptotically normal estimator, which makes comparing
estimates across models much more intuitive.
The estimator used takes time quadratic in max_block_size. Larger values of
max_block_size will decrease the variance of the estimator but increase the
computational cost. This differs slightly from the estimator used by the
original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
The estimate of the standard error will also be more reliable when there are
more blocks, i.e. when max_block_size is smaller.
NOTE: the blocking code assumes that real_activations and
generated_activations are both in random order. If either is sorted in a
meaningful order, the estimator will behave poorly.
Args:
activations1: 2D Tensor containing activations. Shape is
[batch_size, activation_size].
activations2: 2D Tensor containing activations. Shape is
[batch_size, activation_size].
max_block_size: integer, default 1024. The distance estimator splits samples
into blocks for computational efficiency. Larger values are more
computationally expensive but decrease the variance of the distance
estimate. Having a smaller block size also gives a better estimate of the
standard error.
dtype: If not None, coerce activations to this dtype before computations.
Returns:
The Kernel Inception Distance. A floating-point scalar of the same type
as the output of the activations.
An estimate of the standard error of the distance estimator (a scalar of
the same type).
"""
activations1.shape.assert_has_rank(2)
activations2.shape.assert_has_rank(2)
activations1.shape[1:2].assert_is_compatible_with(activations2.shape[1:2])
if dtype is None:
dtype = activations1.dtype
assert activations2.dtype == dtype
else:
activations1 = tf.cast(activations1, dtype)
activations2 = tf.cast(activations2, dtype)
# Figure out how to split the activations into blocks of approximately
# equal size, with none larger than max_block_size.
n_r = tf.shape(input=activations1)[0]
n_g = tf.shape(input=activations2)[0]
n_bigger = tf.maximum(n_r, n_g)
n_blocks = tf.cast(tf.math.ceil(n_bigger / max_block_size), dtype=tf.int32)
v_r = n_r // n_blocks
v_g = n_g // n_blocks
n_plusone_r = n_r - v_r * n_blocks
n_plusone_g = n_g - v_g * n_blocks
sizes_r = tf.concat([
tf.fill([n_blocks - n_plusone_r], v_r),
tf.fill([n_plusone_r], v_r + 1),
], 0)
sizes_g = tf.concat([
tf.fill([n_blocks - n_plusone_g], v_g),
tf.fill([n_plusone_g], v_g + 1),
], 0)
zero = tf.zeros([1], dtype=tf.int32)
inds_r = tf.concat([zero, tf.cumsum(sizes_r)], 0)
inds_g = tf.concat([zero, tf.cumsum(sizes_g)], 0)
dim = tf.cast(activations1.shape[1], dtype)
def compute_kid_block(i):
"""Computes the ith block of the KID estimate."""
r_s = inds_r[i]
r_e = inds_r[i + 1]
r = activations1[r_s:r_e]
m = tf.cast(r_e - r_s, dtype)
g_s = inds_g[i]
g_e = inds_g[i + 1]
g = activations2[g_s:g_e]
n = tf.cast(g_e - g_s, dtype)
k_rr = (tf.matmul(r, r, transpose_b=True) / dim + 1)**3
k_rg = (tf.matmul(r, g, transpose_b=True) / dim + 1)**3
k_gg = (tf.matmul(g, g, transpose_b=True) / dim + 1)**3
return (-2 * tf.reduce_mean(input_tensor=k_rg) +
(tf.reduce_sum(input_tensor=k_rr) - tf.linalg.trace(k_rr)) /
(m * (m - 1)) +
(tf.reduce_sum(input_tensor=k_gg) - tf.linalg.trace(k_gg)) /
(n * (n - 1)))
ests = tf.map_fn(
compute_kid_block, tf.range(n_blocks), dtype=dtype, back_prop=False)
mn = tf.reduce_mean(input_tensor=ests)
# tf.nn.moments doesn't use the Bessel correction, which we want here
n_blocks_ = tf.cast(n_blocks, dtype)
var = tf.cond(
pred=tf.less_equal(n_blocks, 1),
true_fn=lambda: tf.constant(float('nan'), dtype=dtype),
false_fn=lambda: tf.reduce_sum(input_tensor=tf.square(ests - mn)) / (
n_blocks_ - 1))
return mn, tf.sqrt(var / n_blocks_)