in access/feature_extraction.py [0:0]
def get_word2rank(vocab_size=np.inf):
prepare_fasttext_embeddings()
# TODO: Decrease vocab size or load from smaller file
word2rank = {}
line_generator = yield_lines(FASTTEXT_EMBEDDINGS_PATH)
next(line_generator) # Skip the first line (header)
for i, line in enumerate(line_generator):
if (i + 1) > vocab_size:
break
word = line.split(' ')[0]
word2rank[word] = i
return word2rank