def _build_placeholders()

in models_vd/model.py [0:0]


  def _build_placeholders(self, params):
    inputs = {}

    # Phase 1 - program generation
    size = [params['max_enc_len'], None]
    inputs['ques'] = tf.placeholder(tf.int32, size, 'ques')
    inputs['ques_len'] = tf.placeholder(tf.int32, [None], 'ques_len')
    inputs['prog_gt'] = tf.placeholder(tf.int32, [None, None], 'prog')

    size = [None, params['max_enc_len']]
    inputs['cap'] = tf.placeholder(tf.int32, size, 'caption')
    inputs['cap_len'] = tf.placeholder(tf.int32, [None], 'cap_len')
    inputs['cap_prog_gt'] = tf.placeholder(tf.int32, [None, None],
                                           'cap_prog_gt')

    # mask for pairwise program token loss
    inputs['prog_att_mask'] = tf.placeholder(tf.float32, [None, None, None],
                                             'mask')
    # for supervising placeholders
    if params['supervise_attention']:
      size = [params['max_dec_len'], params['max_enc_len'], None, 1]
      inputs['prog_att_gt'] = tf.placeholder(tf.float32, size, 'gt_att')
      inputs['cap_att_gt'] = tf.placeholder(tf.float32, size, 'cap_att')
      # masking out relevant parts for complete supervision
      inputs['ques_super_mask'] = tf.placeholder(tf.float32, size, 'q_mask')
      inputs['cap_super_mask'] = tf.placeholder(tf.float32, size, 'c_mask')
      inputs['supervise_switch'] = tf.placeholder(tf.bool, [],
                                                  'supervise_switch')

    # tie encoder and decoder
    size = [params['num_layers'], None, params['lstm_size']]
    inputs['enc_dec_h'] = tf.placeholder(tf.float32, size, 'enc_dec_h')
    inputs['enc_dec_c'] = tf.placeholder(tf.float32, size, 'enc_dec_c')

    # Phase 2 - program execution
    size = [None, params['h_feat'], params['w_feat'], params['d_feat']]
    inputs['img_feat'] = tf.placeholder(tf.float32, size, 'img_feat')
    inputs['prog_validity'] = tf.placeholder(tf.bool, [None])

    # Phase 2.5 - caption execution
    inputs['align_gt'] = tf.placeholder(tf.int32, [None], 'align_cap')
    inputs['prog_validity_cap'] = tf.placeholder(tf.bool, [None])

    # Phase 3 - answer generation
    inputs['ans_in'] = tf.placeholder(tf.int32, [None, None], 'ans_in')
    inputs['ans_out'] = tf.placeholder(tf.int32, [None, None], 'ans_out')
    inputs['ans'] = tf.placeholder(tf.int32, [None, None], 'ans')
    inputs['ans_len'] = tf.placeholder(tf.int32, [None], 'ans_len')

    # if discriminative, encode options
    # NOTE: num_options hard coded to 100
    num_options = 100
    size = [None, params['max_enc_len'], num_options]
    inputs['opt'] = tf.placeholder(tf.int32, size, 'opt_out')
    inputs['opt_len'] = tf.placeholder(tf.int32, [None, num_options], 'opt_len')
    inputs['gt_ind'] = tf.placeholder(tf.int32, [None], 'gt_ind')

    # history
    size = [None, params['num_rounds'], 2 * params['max_enc_len']]
    inputs['hist'] = tf.placeholder(tf.int32, size, 'history')
    size = [None, params['num_rounds']]
    inputs['hist_len'] = tf.placeholder(tf.int32, size, 'hist_len')

    # place holders for fact
    size = [None, params['max_enc_len']]
    inputs['fact'] = tf.placeholder(tf.int32, size, 'fact')
    inputs['fact_len'] = tf.placeholder(tf.int32, [None], 'fact_len')

    if not self.params['train_mode']:
      # additional placeholders during evaluation
      size = [None, params['lstm_size']]
      inputs['context'] = tf.placeholder(tf.float32, size, 'context')
      size = [1, 1, None, params['lstm_size']]
      inputs['cap_enc'] = tf.placeholder(tf.float32, size, 'cap_enc')
      size = [None, None, None, params['lstm_size']]
      inputs['ques_enc'] = tf.placeholder(tf.float32, size, 'ques_enc')
      size = [None, params['lstm_size']]
      inputs['hist_enc'] = tf.placeholder(tf.float32, size, 'hist_enc')
      size = [params['max_dec_len'], None, params['text_embed_size']]
      inputs['ques_attended'] = tf.placeholder(tf.float32, size, 'ques_att')
      inputs['cap_attended'] = tf.placeholder(tf.float32, size, 'cap_att')

    return inputs