in data_measurements/npmi/npmi.py [0:0]
def __init__(self, vocab_counts_df, tokenized_sentence_df, given_id_terms):
logs.debug("Initiating assoc class.")
self.vocab_counts_df = vocab_counts_df
# TODO: Change this logic so just the vocabulary is given.
self.vocabulary = list(vocab_counts_df.index)
self.vocab_counts = pd.DataFrame([0] * len(self.vocabulary))
logs.debug("vocabulary is is")
logs.debug(self.vocab_counts_df)
self.tokenized_sentence_df = tokenized_sentence_df
logs.debug("tokenized sentences are")
logs.debug(self.tokenized_sentence_df)
self.given_id_terms = given_id_terms
logs.info("identity terms are")
logs.info(self.given_id_terms)
# Terms we calculate the difference between
self.paired_terms = pair_terms(given_id_terms)
# Matrix of # sentences x vocabulary size
self.word_cnts_per_sentence = self.count_words_per_sentence()
logs.info("Calculating results...")
# Formatted as {subgroup:{"count":{...},"npmi":{...}}}
self.assoc_results_dict = self.calc_measures()
# Dictionary keyed by pair tuples. Each value is a dataframe with
# vocab terms as the index, and columns of paired difference and
# individual scores for the two identity terms.
self.bias_results_dict = self.calc_bias(self.assoc_results_dict)