in models.py [0:0]
def update_vocab(self, sentences, tokenize=True):
assert hasattr(self, 'w2v_path'), 'warning : w2v path not set'
assert hasattr(self, 'word_vec'), 'build_vocab before updating it'
word_dict = self.get_word_dict(sentences, tokenize)
# keep only new words
for word in self.word_vec:
if word in word_dict:
del word_dict[word]
# udpate vocabulary
if word_dict:
new_word_vec = self.get_w2v(word_dict)
self.word_vec.update(new_word_vec)
else:
new_word_vec = []
print('New vocab size : %s (added %s words)'% (len(self.word_vec), len(new_word_vec)))