in src/utils.py [0:0]
def bow_idf(sentences, word_vec, idf_dict=None):
"""
Get sentence representations using weigthed IDF bag-of-words.
"""
embeddings = []
for sent in sentences:
sent = set(sent)
list_words = [w for w in sent if w in word_vec and w in idf_dict]
if len(list_words) > 0:
sentvec = [word_vec[w] * idf_dict[w] for w in list_words]
sentvec = sentvec / np.sum([idf_dict[w] for w in list_words])
else:
sentvec = [word_vec[list(word_vec.keys())[0]]]
embeddings.append(np.sum(sentvec, axis=0))
return np.vstack(embeddings)