in torchmoji/create_vocab.py [0:0]
def extend_vocab(current_vocab, new_vocab, max_tokens=10000):
""" Extends current vocabulary with words from vocab that are not
present in the current vocabulary. Adds up to max_tokens words.
# Arguments:
current_vocab: Current dictionary of tokens.
new_vocab: Vocabulary to be added. MUST have word_counts populated, i.e.
must have run count_all_words() previously.
max_tokens: Maximum number of words to be added.
# Returns:
How many new tokens have been added.
"""
if max_tokens < 0:
max_tokens = 10000
words = OrderedDict()
# sort words by frequency
desc_order = OrderedDict(sorted(new_vocab.word_counts.items(),
key=lambda kv: kv[1], reverse=True))
words.update(desc_order)
base_index = len(current_vocab.keys())
added = 0
for word in words:
if added >= max_tokens:
break
if word not in current_vocab.keys():
current_vocab[word] = base_index + added
added += 1
return added