in experiments/mnist_lossy.py [0:0]
def run_single_experiment(seq_length, seed):
np.random.seed(seed)
# Load MNIST test set and convert each image to a bytearray symbol
# (i.e. a variable-length sequence of bytes), representing the output
# of the lossy codec (WebP).
sequence = [compress_image_lossy(image, quality=0, method='WebP')
for image in load_mnist()[:seq_length]]
assert len(sequence) == seq_length
# The ANS state is a compound data-structure composed of a head and tail.
# It allows performing encode and decode operations in parallel, as long
# as the head shape is larger than the input symbol shape. The symbols
# are variable-length sequences of bytes, but the image size (784) is an
# upper bound on the sequence length, so we initialize the ANS head shape
# to 784, to guarantee that the symbol codec can always encode/decode a
# symbol (i.e. the variable-length sequence of bytes) in one operation.
ans_state = cs.base_message(784)
symbol_codec = codecs.ByteArray(784)
# Populate multiset via successive applications of the insert operation.
multiset = build_multiset(sequence)
# Initialize multiset codec
multiset_codec = codecs.Multiset(symbol_codec)
# Encode multiset
(ans_state,) = multiset_codec.encode(ans_state, multiset)
# Calculate the size of the compressed state in bits
# This is the number of bits used to compress the images as a multiset.
compressed_length_multiset = calculate_state_bits(ans_state)
# Decode multiset
multiset_size = seq_length
ans_state, multiset_decoded = \
multiset_codec.decode(ans_state, multiset_size)
# Check if decoded correctly
assert (np.sort(to_sequence(multiset))
== np.sort(to_sequence(multiset_decoded))).all()
# Calculate number of bits used to compress images as an ordered sequence.
ans_state = cs.base_message(784)
(ans_state,) = codecs.Sequence(symbol_codec).encode(ans_state, sequence)
compressed_length_sequence = calculate_state_bits(ans_state)
# Calculate saved bits and theoretical limit
# (i.e. information content of uniform permutations)
saved_bits = compressed_length_sequence - compressed_length_multiset
saved_bits_limit = \
log2_multinomial_coeff(np.unique(sequence, return_counts=True)[1])
return {
'seq_length': seq_length,
'saved_bits': saved_bits,
'saved_bits_limit': saved_bits_limit,
'compressed_length_multiset': compressed_length_multiset,
'compressed_length_sequence': compressed_length_sequence,
}