def __init__()

in iep/models/baselines.py [0:0]


  def __init__(self, vocab,
               rnn_wordvec_dim=300, rnn_dim=256, rnn_num_layers=2, rnn_dropout=0,
               cnn_feat_dim=(1024,14,14),
               stacked_attn_dim=512, num_stacked_attn=2,
               fc_use_batchnorm=False, fc_dropout=0, fc_dims=(1024,)):
    super(CnnLstmSaModel, self).__init__()
    rnn_kwargs = {
      'token_to_idx': vocab['question_token_to_idx'],
      'wordvec_dim': rnn_wordvec_dim,
      'rnn_dim': rnn_dim,
      'rnn_num_layers': rnn_num_layers,
      'rnn_dropout': rnn_dropout,
    }
    self.rnn = LstmEncoder(**rnn_kwargs)

    C, H, W = cnn_feat_dim
    self.image_proj = nn.Conv2d(C, rnn_dim, kernel_size=1, padding=0)
    self.stacked_attns = []
    for i in range(num_stacked_attn):
      sa = StackedAttention(rnn_dim, stacked_attn_dim)
      self.stacked_attns.append(sa)
      self.add_module('stacked-attn-%d' % i, sa)

    classifier_args = {
      'input_dim': rnn_dim,
      'hidden_dims': fc_dims,
      'output_dim': len(vocab['answer_token_to_idx']),
      'use_batchnorm': fc_use_batchnorm,
      'dropout': fc_dropout,
    }
    self.classifier = build_mlp(**classifier_args)