in data_measurements/npmi/npmi.py [0:0]
def _prepare_identity_terms(self):
"""Uses DataFrame magic to return those terms that appear
greater than min_vocab times."""
# Mask to get the identity terms
true_false = [term in self.vocab_counts_df.index for term in
self.identity_terms]
# List of identity terms
word_list_tmp = [x for x, y in zip(self.identity_terms, true_false) if
y]
# Whether said identity terms have a count > min_count
true_false_counts = [
self.vocab_counts_df.loc[word, CNT] >= self.min_count for word in
word_list_tmp]
# List of identity terms with a count higher than min_count
avail_identity_terms = [word for word, y in
zip(word_list_tmp, true_false_counts) if y]
logs.debug("Identity terms that occur > %s times are:" % self.min_count)
logs.debug(avail_identity_terms)
return avail_identity_terms