in ludwig/encoders/sequence_encoders.py [0:0]
def __init__(
self,
should_embed=True,
vocab=None,
representation='dense',
embedding_size=256,
embeddings_trainable=True,
pretrained_embeddings=None,
embeddings_on_cpu=False,
conv_layers=None,
num_conv_layers=None,
filter_size=3,
num_filters=256,
pool_function='max',
pool_size=None,
fc_layers=None,
num_fc_layers=None,
fc_size=256,
use_bias=True,
weights_initializer='glorot_uniform',
bias_initializer='zeros',
weights_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
# weights_constraint=None,
# bias_constraint=None,
norm=None,
norm_params=None,
activation='relu',
dropout=0,
reduce_output='max',
**kwargs):
"""
:param should_embed: If True the input sequence is expected
to be made of integers and will be mapped into embeddings
:type should_embed: Boolean
:param vocab: Vocabulary of the input feature to encode
:type vocab: List
:param representation: the possible values are `dense` and `sparse`.
`dense` means the mebeddings are initialized randomly,
`sparse` meanse they are initialized to be one-hot encodings.
:type representation: Str (one of 'dense' or 'sparse')
:param embedding_size: it is the maximum embedding size, the actual
size will be `min(vocaularyb_size, embedding_size)`
for `dense` representations and exacly `vocaularyb_size`
for the `sparse` encoding, where `vocabulary_size` is
the number of different strings appearing in the training set
in the column the feature is named after (plus 1 for `<UNK>`).
:type embedding_size: Integer
:param embeddings_trainable: If `True` embeddings are trained during
the training process, if `False` embeddings are fixed.
It may be useful when loading pretrained embeddings
for avoiding finetuning them. This parameter has effect only
for `representation` is `dense` as `sparse` one-hot encodings
are not trainable.
:type embeddings_trainable: Boolean
:param pretrained_embeddings: by default `dense` embeddings
are initialized randomly, but this parameter allow to specify
a path to a file containing embeddings in the GloVe format.
When the file containing the embeddings is loaded, only the
embeddings with labels present in the vocabulary are kept,
the others are discarded. If the vocabulary contains strings
that have no match in the embeddings file, their embeddings
are initialized with the average of all other embedding plus
some random noise to make them different from each other.
This parameter has effect only if `representation` is `dense`.
:type pretrained_embeddings: str (filepath)
:param embeddings_on_cpu: by default embedings matrices are stored
on GPU memory if a GPU is used, as it allows
for faster access, but in some cases the embedding matrix
may be really big and this parameter forces the placement
of the embedding matrix in regular memroy and the CPU is used
to resolve them, slightly slowing down the process
as a result of data transfer between CPU and GPU memory.
:param conv_layers: it is a list of dictionaries containing
the parameters of all the convolutional layers. The length
of the list determines the number of parallel convolutional
layers and the content of each dictionary determines
the parameters for a specific layer. The available parameters
for each layer are: `filter_size`, `num_filters`, `pool`,
`norm`, `activation` and `regularize`. If any of those values
is missing from the dictionary, the default one specified
as a parameter of the encoder will be used instead. If both
`conv_layers` and `num_conv_layers` are `None`, a default
list will be assigned to `conv_layers` with the value
`[{filter_size: 2}, {filter_size: 3}, {filter_size: 4},
{filter_size: 5}]`.
:type conv_layers: List
:param num_conv_layers: if `conv_layers` is `None`, this is
the number of parallel convolutional layers.
:type num_conv_layers: Integer
:param filter_size: if a `filter_size` is not already specified in
`conv_layers` this is the default `filter_size` that
will be used for each layer. It indicates how wide is
the 1d convolutional filter.
:type filter_size: Integer
:param num_filters: if a `num_filters` is not already specified in
`conv_layers` this is the default `num_filters` that
will be used for each layer. It indicates the number
of filters, and by consequence the output channels of
the 1d convolution.
:type num_filters: Integer
:param pool_size: if a `pool_size` is not already specified
in `conv_layers` this is the default `pool_size` that
will be used for each layer. It indicates the size of
the max pooling that will be performed along the `s` sequence
dimension after the convolution operation.
:type pool_size: Integer
:param fc_layers: it is a list of dictionaries containing
the parameters of all the fully connected layers. The length
of the list determines the number of stacked fully connected
layers and the content of each dictionary determines
the parameters for a specific layer. The available parameters
for each layer are: `fc_size`, `norm`, `activation` and
`regularize`. If any of those values is missing from
the dictionary, the default one specified as a parameter of
the encoder will be used instead. If both `fc_layers` and
`num_fc_layers` are `None`, a default list will be assigned
to `fc_layers` with the value
`[{fc_size: 512}, {fc_size: 256}]`.
(only applies if `reduce_output` is not `None`).
:type fc_layers: List
:param num_fc_layers: if `fc_layers` is `None`, this is the number
of stacked fully connected layers (only applies if
`reduce_output` is not `None`).
:type num_fc_layers: Integer
:param fc_size: if a `fc_size` is not already specified in
`fc_layers` this is the default `fc_size` that will be used
for each layer. It indicates the size of the output
of a fully connected layer.
:type fc_size: Integer
:param norm: if a `norm` is not already specified in `conv_layers`
or `fc_layers` this is the default `norm` that will be used
for each layer. It indicates the norm of the output.
:type norm: str
:param activation: Default activation function to use
:type activation: Str
:param dropout: determines if there should be a dropout layer before
returning the encoder output.
:type dropout: Boolean
:param initializer: the initializer to use. If `None` it uses
`glorot_uniform`. Options are: `constant`, `identity`,
`zeros`, `ones`, `orthogonal`, `normal`, `uniform`,
`truncated_normal`, `variance_scaling`, `glorot_normal`,
`glorot_uniform`, `xavier_normal`, `xavier_uniform`,
`he_normal`, `he_uniform`, `lecun_normal`, `lecun_uniform`.
Alternatively it is possible to specify a dictionary with
a key `type` that identifies the type of initialzier and
other keys for its parameters,
e.g. `{type: normal, mean: 0, stddev: 0}`.
To know the parameters of each initializer, please refer
to TensorFlow's documentation.
:type initializer: str
:param regularize: if a `regularize` is not already specified in
`conv_layers` or `fc_layers` this is the default `regularize`
that will be used for each layer. It indicates if
the layer weights should be considered when computing
a regularization loss.
:type regularize:
:param reduce_output: defines how to reduce the output tensor of
the convolutional layers along the `s` sequence length
dimention if the rank of the tensor is greater than 2.
Available values are: `sum`, `mean` or `avg`, `max`, `concat`
(concatenates along the first dimension), `last` (returns
the last vector of the first dimension) and `None` or `null`
(which does not reduce and returns the full tensor).
:type reduce_output: str
"""
super(ParallelCNN, self).__init__()
logger.debug(' {}'.format(self.name))
if conv_layers is not None and num_conv_layers is None:
# use custom-defined layers
self.conv_layers = conv_layers
self.num_conv_layers = len(conv_layers)
elif conv_layers is None and num_conv_layers is not None:
# generate num_conv_layers with default parameters
self.conv_layers = None
self.num_conv_layers = num_conv_layers
elif conv_layers is None and num_conv_layers is None:
# use default layers with varying filter sizes
self.conv_layers = [
{'filter_size': 2},
{'filter_size': 3},
{'filter_size': 4},
{'filter_size': 5}
]
self.num_conv_layers = 4
else:
raise ValueError(
'Invalid layer parametrization, use either conv_layers or'
' num_conv_layers'
)
# The user is expected to provide fc_layers or num_fc_layers
# The following logic handles the case where the user either provides
# both or neither.
if fc_layers is None and num_fc_layers is None:
# use default layers with varying filter sizes
fc_layers = [
{'fc_size': 512},
{'fc_size': 256}
]
num_fc_layers = 2
elif fc_layers is not None and num_fc_layers is not None:
raise ValueError(
'Invalid layer parametrization, use either fc_layers or '
'num_fc_layers only. Not both.'
)
self.reduce_output = reduce_output
self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
self.should_embed = should_embed
self.embed_sequence = None
if self.should_embed:
logger.debug(' EmbedSequence')
self.embed_sequence = EmbedSequence(
vocab,
embedding_size,
representation=representation,
embeddings_trainable=embeddings_trainable,
pretrained_embeddings=pretrained_embeddings,
embeddings_on_cpu=embeddings_on_cpu,
dropout=dropout,
embedding_initializer=weights_initializer,
embedding_regularizer=weights_regularizer
)
logger.debug(' ParallelConv1D')
self.parallel_conv1d = ParallelConv1D(
layers=self.conv_layers,
default_num_filters=num_filters,
default_filter_size=filter_size,
default_use_bias=use_bias,
default_weights_initializer=weights_initializer,
default_bias_initializer=bias_initializer,
default_weights_regularizer=weights_regularizer,
default_bias_regularizer=bias_regularizer,
default_activity_regularizer=activity_regularizer,
# default_weights_constraint=None,
# default_bias_constraint=None,
default_norm=norm,
default_norm_params=norm_params,
default_activation=activation,
default_dropout=dropout,
default_pool_function=pool_function,
default_pool_size=pool_size,
default_pool_padding='same',
)
if self.reduce_output is not None:
logger.debug(' FCStack')
self.fc_stack = FCStack(
layers=fc_layers,
num_layers=num_fc_layers,
default_fc_size=fc_size,
default_use_bias=use_bias,
default_weights_initializer=weights_initializer,
default_bias_initializer=bias_initializer,
default_weights_regularizer=weights_regularizer,
default_bias_regularizer=bias_regularizer,
default_activity_regularizer=activity_regularizer,
# default_weights_constraint=weights_constraint,
# default_bias_constraint=bias_constraint,
default_norm=norm,
default_norm_params=norm_params,
default_activation=activation,
default_dropout=dropout,
)