in python/lltm_baseline.py [0:0]
    def forward(ctx, input, weights, bias, old_h, old_cell):
        X = torch.cat([old_h, input], dim=1)
        gate_weights = F.linear(X, weights, bias)
        gates = gate_weights.chunk(3, dim=1)
        input_gate = torch.sigmoid(gates[0])
        output_gate = torch.sigmoid(gates[1])
        candidate_cell = F.elu(gates[2])
        new_cell = old_cell + candidate_cell * input_gate
        new_h = torch.tanh(new_cell) * output_gate
        ctx.save_for_backward(X, weights, input_gate, output_gate, old_cell,
                              new_cell, candidate_cell, gate_weights)
        return new_h, new_cell