in training/models.py [0:0]
def __init__(self,
question_vocab,
num_output=4,
question_wordvec_dim=64,
question_hidden_dim=64,
question_num_layers=2,
question_dropout=0.5,
planner_rnn_image_feat_dim=128,
planner_rnn_action_embed_dim=32,
planner_rnn_type='GRU',
planner_rnn_hidden_dim=1024,
planner_rnn_num_layers=1,
planner_rnn_dropout=0,
controller_fc_dims=(256, )):
super(NavPlannerControllerModel, self).__init__()
self.cnn_fc_layer = nn.Sequential(
nn.Linear(32 * 10 * 10, planner_rnn_image_feat_dim),
nn.ReLU(),
nn.Dropout(p=0.5))
q_rnn_kwargs = {
'token_to_idx': question_vocab['questionTokenToIdx'],
'wordvec_dim': question_wordvec_dim,
'rnn_dim': question_hidden_dim,
'rnn_num_layers': question_num_layers,
'rnn_dropout': question_dropout,
}
self.q_rnn = QuestionLstmEncoder(**q_rnn_kwargs)
self.ques_tr = nn.Sequential(
nn.Linear(question_hidden_dim, question_hidden_dim),
nn.ReLU(),
nn.Dropout(p=0.5))
self.planner_nav_rnn = NavRnn(
image_input=True,
image_feat_dim=planner_rnn_image_feat_dim,
question_input=True,
question_embed_dim=question_hidden_dim,
action_input=True,
action_embed_dim=planner_rnn_action_embed_dim,
num_actions=num_output,
rnn_type=planner_rnn_type,
rnn_hidden_dim=planner_rnn_hidden_dim,
rnn_num_layers=planner_rnn_num_layers,
rnn_dropout=planner_rnn_dropout,
return_states=True)
controller_kwargs = {
'input_dim':
planner_rnn_image_feat_dim + planner_rnn_action_embed_dim +
planner_rnn_hidden_dim,
'hidden_dims':
controller_fc_dims,
'output_dim':
2,
'add_sigmoid':
0
}
self.controller = build_mlp(**controller_kwargs)