def __init__()

in data_measurements/npmi/npmi.py [0:0]


    def __init__(self, vocab_counts_df, tokenized_sentence_df, given_id_terms):
        logs.debug("Initiating assoc class.")
        self.vocab_counts_df = vocab_counts_df
        # TODO: Change this logic so just the vocabulary is given.
        self.vocabulary = list(vocab_counts_df.index)
        self.vocab_counts = pd.DataFrame([0] * len(self.vocabulary))
        logs.debug("vocabulary is is")
        logs.debug(self.vocab_counts_df)
        self.tokenized_sentence_df = tokenized_sentence_df
        logs.debug("tokenized sentences are")
        logs.debug(self.tokenized_sentence_df)
        self.given_id_terms = given_id_terms
        logs.info("identity terms are")
        logs.info(self.given_id_terms)
        # Terms we calculate the difference between
        self.paired_terms = pair_terms(given_id_terms)

        # Matrix of # sentences x vocabulary size
        self.word_cnts_per_sentence = self.count_words_per_sentence()
        logs.info("Calculating results...")
        # Formatted as {subgroup:{"count":{...},"npmi":{...}}}
        self.assoc_results_dict = self.calc_measures()
        # Dictionary keyed by pair tuples. Each value is a dataframe with
        # vocab terms as the index, and columns of paired difference and
        # individual scores for the two identity terms.
        self.bias_results_dict = self.calc_bias(self.assoc_results_dict)