in question_generation_model.py [0:0]
def diverse_beam_search(self, image_input, model, beam_size, num_groups, keyword=None):
"""
This function performs diverse beam search borrowing idea from https://arxiv.org/abs/1610.02424
:param image_input: Image encoded feature
:param model: Model definition file
:param beam_size: Beam size to be used for decoding
:param num_groups: Number of clusters
:param keyword: Keyword input feature to be used with some model architectures
:return:
"""
# Ignore PAD, START, END tokens
start = [self.word_to_idx["<START>"]]
if 'glov' in self.datasets.embedding_file:
max_seq_len = model.inputs[1].shape[1].value
else:
max_seq_len = self.datasets.max_question_len
image_input = np.repeat(image_input, axis=0, repeats=2)
self.logger.info('max len %s', max_seq_len)
start_word = [[start, 0.0]]
EOS_utterances = []
map_EOS_utterances = dict()
while len(start_word[0][0]) < max_seq_len:
temp = []
self.logger.debug('\n\n\nCurrent it: %s Max seq len: %s' %(len(start_word[0][0]), max_seq_len))
for s in start_word:
# self.logger.info('Start word Tuple %s' % s)
if self.datasets.use_keyword:
sequence = pad_sequences([s[0]], maxlen=max_seq_len, padding='post')
preds = model.predict([image_input, sequence, keyword])
elif 'glove' in self.datasets.embedding_file:
sequence = pad_sequences([s[0]], maxlen=max_seq_len, padding='post')
preds = model.predict([image_input, sequence])
elif 'elmo' in self.datasets.embedding_file:
sequence = ' '.join([self.datasets.idx_to_word[idx] for idx in s[0]])
sequence = self.cleanText(sequence)
sequence = np.array([sequence, sequence])
preds = model.predict([image_input, sequence])
elif 'bert' in self.datasets.embedding_file:
sequence = ' '.join([self.datasets.idx_to_word[idx] for idx in s[0][1:]])
sequence = self.cleanText(sequence)
sequence = [[sequence], [sequence]]
input_ids, input_masks, segment_ids, _ = preprocess_bert_input(sequence, [None] * len(sequence),
self.datasets.max_question_len,
self.tokenizer, self.vocab_size)
preds = model.predict([image_input, input_ids, input_masks, segment_ids])
else:
exit(-1)
word_preds = np.argsort(preds[0])[- 2*beam_size:]
# Getting the top <beam_size>(n) predictions and creating a
# new list so as to put them via the model again
for w in word_preds:
next_quest, prob = s[0][:], s[1]
next_quest.append(w)
# If END token is found then keep the utterance
if w == 2:
intermediate_question = ' '.join([self.datasets.idx_to_word[i] for i in next_quest])
if intermediate_question not in map_EOS_utterances:
map_EOS_utterances[intermediate_question] = prob
EOS_utterances.append([next_quest, prob])
prob += preds[0][w]
temp.append([next_quest, prob])
start_word = temp
# Sorting according to the probabilities
start_word = sorted(start_word, reverse=False, key=lambda l: l[1])
# Getting the top words
if len(start_word[0][0]) > 2:
start_word = start_word[-2 * beam_size:]
start_word = self.dissimilarity_grouping(start_word, num_groups, beam_size)
else:
start_word = start_word[-beam_size:]
candidates = dict()
max_prob = 0
final_candidate = ''
start_word += EOS_utterances
unique_questions_not_seen_training_data = set()
thresh = 2.0
for st_wd in start_word:
# import pdb;pdb.set_trace()
prob = st_wd[1]
st_wd = st_wd[0]
intermediate_question = [self.datasets.idx_to_word[i] for i in st_wd]
final_question = []
for i in intermediate_question:
if i != '<END>':
final_question.append(i)
else:
break
final_question = ' '.join(final_question[1:])
if prob > max_prob:
max_prob = prob
final_candidate = final_question
if prob > thresh:
if final_question in candidates:
if prob > candidates[final_question]:
candidates[final_question] = prob
else:
candidates[final_question] = prob
if final_question not in self.datasets.unique_train_questions:
unique_questions_not_seen_training_data.add(final_question)
self.logger.info(
'Unique generated questions not seen in training data: %s' % unique_questions_not_seen_training_data)
self.datasets.unique_generated_questions.update(candidates.keys())
self.logger.info('Final DBS candidates: %s' % candidates)
self.datasets.generated_questions += list(candidates.keys())
# Inventiness:
# Number of unique questions not seen in training data / Total number of generated questions for that image
self.logger.info('Inventiveness: %s' % str(len(unique_questions_not_seen_training_data) / len(candidates)))
return [final_candidate], candidates