in src/speech_reps/models/decoar.py [0:0]
def __init__(self):
"""
input_size: an int indicating the input feature size, e.g., 80 for Mel.
hidden_size: an int indicating the RNN hidden size.
num_layers: an int indicating the number of RNN layers.
dropout: a float indicating the RNN dropout rate.
residual: a bool indicating whether to apply residual connections.
"""
super(DeCoAR, self).__init__()
self.embed = 80
d = 1024
self.encoder_layers = 4
self.post_extract_proj = nn.Linear(self.embed, d)
self.forward_lstm = nn.LSTM(input_size=d, hidden_size=d, num_layers=self.encoder_layers,
batch_first=True, bidirectional=False)
self.backward_lstm = nn.LSTM(input_size=d, hidden_size=d, num_layers=self.encoder_layers,
batch_first=True, bidirectional=False)