easy_rec/python/layers/variational_dropout_layer.py (100 lines of code) (raw):
# -*- encoding: utf-8 -*-
# Copyright (c) Alibaba, Inc. and its affiliates.
import json
import numpy as np
import tensorflow as tf
from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA
from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA
if tf.__version__ >= '2.0':
tf = tf.compat.v1
class VariationalDropoutLayer(object):
"""Rank features by variational dropout.
Use the Dropout concept on the input feature layer and optimize the corresponding feature-wise dropout rate
paper: Dropout Feature Ranking for Deep Learning Models
arXiv: 1712.08645
"""
def __init__(self,
variational_dropout_config,
features_dimension,
is_training=False,
name=''):
self._config = variational_dropout_config
self.features_dimension = features_dimension
self.features_total_dimension = sum(self.features_dimension.values())
if self.variational_dropout_wise():
self._dropout_param_size = self.features_total_dimension
self.drop_param_shape = [self._dropout_param_size]
else:
self._dropout_param_size = len(self.features_dimension)
self.drop_param_shape = [self._dropout_param_size]
self.evaluate = not is_training
logit_p_name = 'logit_p' if name == 'all' else 'logit_p_%s' % name
self.logit_p = tf.get_variable(
name=logit_p_name,
shape=self.drop_param_shape,
dtype=tf.float32,
initializer=None)
tf.add_to_collection(
'variational_dropout',
json.dumps([name, list(self.features_dimension.items())]))
def get_lambda(self):
return self._config.regularization_lambda
def variational_dropout_wise(self):
return self._config.embedding_wise_variational_dropout
def build_expand_index(self, batch_size):
# Build index_list--->[[0,0],[0,0],[0,0],[0,0],[0,1]......]
expanded_index = []
for i, index_loop_count in enumerate(self.features_dimension.values()):
for m in range(index_loop_count):
expanded_index.append([i])
expanded_index = tf.tile(expanded_index, [batch_size, 1])
batch_size_range = tf.range(batch_size)
expand_range_axis = tf.expand_dims(batch_size_range, 1)
batch_size_range_expand_dim_len = tf.tile(
expand_range_axis, [1, self.features_total_dimension])
index_i = tf.reshape(batch_size_range_expand_dim_len, [-1, 1])
expanded_index = tf.concat([index_i, expanded_index], 1)
return expanded_index
def sample_noisy_input(self, input):
batch_size = tf.shape(input)[0]
if self.evaluate:
expanded_dims_logit_p = tf.expand_dims(self.logit_p, 0)
expanded_logit_p = tf.tile(expanded_dims_logit_p, [batch_size, 1])
p = tf.sigmoid(expanded_logit_p)
if self.variational_dropout_wise():
scaled_input = input * (1 - p)
else:
# expand dropout layer
expanded_index = self.build_expand_index(batch_size)
expanded_p = tf.gather_nd(p, expanded_index)
expanded_p = tf.reshape(expanded_p, [-1, self.features_total_dimension])
scaled_input = input * (1 - expanded_p)
return scaled_input
else:
bern_val = self.sampled_from_logit_p(batch_size)
bern_val = tf.reshape(bern_val, [-1, self.features_total_dimension])
noisy_input = input * bern_val
return noisy_input
def sampled_from_logit_p(self, num_samples):
expand_dims_logit_p = tf.expand_dims(self.logit_p, 0)
expand_logit_p = tf.tile(expand_dims_logit_p, [num_samples, 1])
dropout_p = tf.sigmoid(expand_logit_p)
bern_val = self.concrete_dropout_neuron(dropout_p)
if self.variational_dropout_wise():
return bern_val
else:
# from feature_num to embedding_dim_num
expanded_index = self.build_expand_index(num_samples)
bern_val_gather_nd = tf.gather_nd(bern_val, expanded_index)
return bern_val_gather_nd
def concrete_dropout_neuron(self, dropout_p, temp=1.0 / 10.0):
EPSILON = np.finfo(float).eps
unif_noise = tf.random_uniform(
tf.shape(dropout_p), dtype=tf.float32, seed=None, name='unif_noise')
approx = (
tf.log(dropout_p + EPSILON) - tf.log(1. - dropout_p + EPSILON) +
tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON))
approx_output = tf.sigmoid(approx / temp)
return 1 - approx_output
def __call__(self, output_features):
batch_size = tf.shape(output_features)[0]
noisy_input = self.sample_noisy_input(output_features)
dropout_p = tf.sigmoid(self.logit_p)
variational_dropout_penalty = 1. - dropout_p
variational_dropout_penalty_lambda = self.get_lambda() / tf.cast(
batch_size, dtype=tf.float32)
variational_dropout_loss_sum = variational_dropout_penalty_lambda * tf.reduce_sum(
variational_dropout_penalty, axis=0)
tf.add_to_collection('variational_dropout_loss',
variational_dropout_loss_sum)
return noisy_input