in src/speech_reps/models/bertphone.py [0:0]
def __init__(self, attention_cell='multi_head', units=128,
hidden_size=512, num_heads=4, scaled=True,
dropout=0.0, use_residual=True, output_attention=False,
weight_initializer=None, bias_initializer='zeros',
prefix=None, params=None, activation='gelu', layer_norm_eps=None):
super(BERTEncoderCell, self).__init__(attention_cell=attention_cell,
units=units, hidden_size=hidden_size,
num_heads=num_heads, scaled=scaled,
dropout=dropout, use_residual=use_residual,
output_attention=output_attention,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer,
prefix=prefix, params=params,
# extra configurations for BERT
attention_use_bias=True,
attention_proj_use_bias=True,
use_bert_layer_norm=True,
use_bert_ffn=True,
activation=activation,
layer_norm_eps=layer_norm_eps)