easy_rec/python/layers/variational_dropout_layer.py (100 lines of code) (raw):

# -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import json import numpy as np import tensorflow as tf from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA if tf.__version__ >= '2.0': tf = tf.compat.v1 class VariationalDropoutLayer(object): """Rank features by variational dropout. Use the Dropout concept on the input feature layer and optimize the corresponding feature-wise dropout rate paper: Dropout Feature Ranking for Deep Learning Models arXiv: 1712.08645 """ def __init__(self, variational_dropout_config, features_dimension, is_training=False, name=''): self._config = variational_dropout_config self.features_dimension = features_dimension self.features_total_dimension = sum(self.features_dimension.values()) if self.variational_dropout_wise(): self._dropout_param_size = self.features_total_dimension self.drop_param_shape = [self._dropout_param_size] else: self._dropout_param_size = len(self.features_dimension) self.drop_param_shape = [self._dropout_param_size] self.evaluate = not is_training logit_p_name = 'logit_p' if name == 'all' else 'logit_p_%s' % name self.logit_p = tf.get_variable( name=logit_p_name, shape=self.drop_param_shape, dtype=tf.float32, initializer=None) tf.add_to_collection( 'variational_dropout', json.dumps([name, list(self.features_dimension.items())])) def get_lambda(self): return self._config.regularization_lambda def variational_dropout_wise(self): return self._config.embedding_wise_variational_dropout def build_expand_index(self, batch_size): # Build index_list--->[[0,0],[0,0],[0,0],[0,0],[0,1]......] expanded_index = [] for i, index_loop_count in enumerate(self.features_dimension.values()): for m in range(index_loop_count): expanded_index.append([i]) expanded_index = tf.tile(expanded_index, [batch_size, 1]) batch_size_range = tf.range(batch_size) expand_range_axis = tf.expand_dims(batch_size_range, 1) batch_size_range_expand_dim_len = tf.tile( expand_range_axis, [1, self.features_total_dimension]) index_i = tf.reshape(batch_size_range_expand_dim_len, [-1, 1]) expanded_index = tf.concat([index_i, expanded_index], 1) return expanded_index def sample_noisy_input(self, input): batch_size = tf.shape(input)[0] if self.evaluate: expanded_dims_logit_p = tf.expand_dims(self.logit_p, 0) expanded_logit_p = tf.tile(expanded_dims_logit_p, [batch_size, 1]) p = tf.sigmoid(expanded_logit_p) if self.variational_dropout_wise(): scaled_input = input * (1 - p) else: # expand dropout layer expanded_index = self.build_expand_index(batch_size) expanded_p = tf.gather_nd(p, expanded_index) expanded_p = tf.reshape(expanded_p, [-1, self.features_total_dimension]) scaled_input = input * (1 - expanded_p) return scaled_input else: bern_val = self.sampled_from_logit_p(batch_size) bern_val = tf.reshape(bern_val, [-1, self.features_total_dimension]) noisy_input = input * bern_val return noisy_input def sampled_from_logit_p(self, num_samples): expand_dims_logit_p = tf.expand_dims(self.logit_p, 0) expand_logit_p = tf.tile(expand_dims_logit_p, [num_samples, 1]) dropout_p = tf.sigmoid(expand_logit_p) bern_val = self.concrete_dropout_neuron(dropout_p) if self.variational_dropout_wise(): return bern_val else: # from feature_num to embedding_dim_num expanded_index = self.build_expand_index(num_samples) bern_val_gather_nd = tf.gather_nd(bern_val, expanded_index) return bern_val_gather_nd def concrete_dropout_neuron(self, dropout_p, temp=1.0 / 10.0): EPSILON = np.finfo(float).eps unif_noise = tf.random_uniform( tf.shape(dropout_p), dtype=tf.float32, seed=None, name='unif_noise') approx = ( tf.log(dropout_p + EPSILON) - tf.log(1. - dropout_p + EPSILON) + tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON)) approx_output = tf.sigmoid(approx / temp) return 1 - approx_output def __call__(self, output_features): batch_size = tf.shape(output_features)[0] noisy_input = self.sample_noisy_input(output_features) dropout_p = tf.sigmoid(self.logit_p) variational_dropout_penalty = 1. - dropout_p variational_dropout_penalty_lambda = self.get_lambda() / tf.cast( batch_size, dtype=tf.float32) variational_dropout_loss_sum = variational_dropout_penalty_lambda * tf.reduce_sum( variational_dropout_penalty, axis=0) tf.add_to_collection('variational_dropout_loss', variational_dropout_loss_sum) return noisy_input