in models_vd/model.py [0:0]
def _build_placeholders(self, params):
inputs = {}
# Phase 1 - program generation
size = [params['max_enc_len'], None]
inputs['ques'] = tf.placeholder(tf.int32, size, 'ques')
inputs['ques_len'] = tf.placeholder(tf.int32, [None], 'ques_len')
inputs['prog_gt'] = tf.placeholder(tf.int32, [None, None], 'prog')
size = [None, params['max_enc_len']]
inputs['cap'] = tf.placeholder(tf.int32, size, 'caption')
inputs['cap_len'] = tf.placeholder(tf.int32, [None], 'cap_len')
inputs['cap_prog_gt'] = tf.placeholder(tf.int32, [None, None],
'cap_prog_gt')
# mask for pairwise program token loss
inputs['prog_att_mask'] = tf.placeholder(tf.float32, [None, None, None],
'mask')
# for supervising placeholders
if params['supervise_attention']:
size = [params['max_dec_len'], params['max_enc_len'], None, 1]
inputs['prog_att_gt'] = tf.placeholder(tf.float32, size, 'gt_att')
inputs['cap_att_gt'] = tf.placeholder(tf.float32, size, 'cap_att')
# masking out relevant parts for complete supervision
inputs['ques_super_mask'] = tf.placeholder(tf.float32, size, 'q_mask')
inputs['cap_super_mask'] = tf.placeholder(tf.float32, size, 'c_mask')
inputs['supervise_switch'] = tf.placeholder(tf.bool, [],
'supervise_switch')
# tie encoder and decoder
size = [params['num_layers'], None, params['lstm_size']]
inputs['enc_dec_h'] = tf.placeholder(tf.float32, size, 'enc_dec_h')
inputs['enc_dec_c'] = tf.placeholder(tf.float32, size, 'enc_dec_c')
# Phase 2 - program execution
size = [None, params['h_feat'], params['w_feat'], params['d_feat']]
inputs['img_feat'] = tf.placeholder(tf.float32, size, 'img_feat')
inputs['prog_validity'] = tf.placeholder(tf.bool, [None])
# Phase 2.5 - caption execution
inputs['align_gt'] = tf.placeholder(tf.int32, [None], 'align_cap')
inputs['prog_validity_cap'] = tf.placeholder(tf.bool, [None])
# Phase 3 - answer generation
inputs['ans_in'] = tf.placeholder(tf.int32, [None, None], 'ans_in')
inputs['ans_out'] = tf.placeholder(tf.int32, [None, None], 'ans_out')
inputs['ans'] = tf.placeholder(tf.int32, [None, None], 'ans')
inputs['ans_len'] = tf.placeholder(tf.int32, [None], 'ans_len')
# if discriminative, encode options
# NOTE: num_options hard coded to 100
num_options = 100
size = [None, params['max_enc_len'], num_options]
inputs['opt'] = tf.placeholder(tf.int32, size, 'opt_out')
inputs['opt_len'] = tf.placeholder(tf.int32, [None, num_options], 'opt_len')
inputs['gt_ind'] = tf.placeholder(tf.int32, [None], 'gt_ind')
# history
size = [None, params['num_rounds'], 2 * params['max_enc_len']]
inputs['hist'] = tf.placeholder(tf.int32, size, 'history')
size = [None, params['num_rounds']]
inputs['hist_len'] = tf.placeholder(tf.int32, size, 'hist_len')
# place holders for fact
size = [None, params['max_enc_len']]
inputs['fact'] = tf.placeholder(tf.int32, size, 'fact')
inputs['fact_len'] = tf.placeholder(tf.int32, [None], 'fact_len')
if not self.params['train_mode']:
# additional placeholders during evaluation
size = [None, params['lstm_size']]
inputs['context'] = tf.placeholder(tf.float32, size, 'context')
size = [1, 1, None, params['lstm_size']]
inputs['cap_enc'] = tf.placeholder(tf.float32, size, 'cap_enc')
size = [None, None, None, params['lstm_size']]
inputs['ques_enc'] = tf.placeholder(tf.float32, size, 'ques_enc')
size = [None, params['lstm_size']]
inputs['hist_enc'] = tf.placeholder(tf.float32, size, 'hist_enc')
size = [params['max_dec_len'], None, params['text_embed_size']]
inputs['ques_attended'] = tf.placeholder(tf.float32, size, 'ques_att')
inputs['cap_attended'] = tf.placeholder(tf.float32, size, 'cap_att')
return inputs