in review_sentiment_model.py [0:0]
def forward(self, ids):
# ids = [batch size, seq len]
output = self.transformer(ids, output_attentions=True)
hidden = output.last_hidden_state
# hidden = [batch size, seq len, hidden dim]
attention = output.attentions[-1]
# attention = [batch size, n heads, seq len, seq len]
cls_hidden = hidden[:, 0, :]
prediction = self.fc(torch.tanh(cls_hidden))
# prediction = [batch size, output dim]
return prediction