in question_generation_model.py [0:0]
def load_embeddings(self):
"""
Load Glove vectors
:return:
"""
glove_dir = os.getcwd() # '/Volumes/Data/data/glove.6B'
embeddings_index = {} # empty dictionary
f = open(os.path.join(glove_dir, self.datasets.embedding_file), encoding="utf-8")
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
self.logger.info("Embedding_index: %s" % str(len(embeddings_index)))
# Get 200-dim dense vector for each of the words in our vocabulary
embedding_matrix = np.zeros((self.vocab_size, self.embedding_dim))
for word, i in self.word_to_idx.items():
# if i < max_words:
embedding_vector = embeddings_index.get(word)
if word == '<START>' or word == '<END>':
embedding_vector = np.random.rand(self.embedding_dim)
if embedding_vector is not None and len(embedding_vector) > 0:
embedding_matrix[i] = embedding_vector
else:
# Words not found in the embedding index will be all zeros
embedding_matrix[i] = np.zeros(self.embedding_dim)
return embedding_matrix