def _compute()

in npmi/npmi.py [0:0]


    def _compute(self, references, vocab_counts, subgroup):
        if isinstance(vocab_counts, dict):
            vocab_counts_df = pd.DataFrame.from_dict(vocab_counts,
                                                     orient='index',
                                                     columns=[CNT])
        elif isinstance(vocab_counts, pd.DataFrame):
            vocab_counts_df = vocab_counts
        else:
            print("Can't support the data structure for the vocab counts. =(")
            return
        # These are used throughout the rest of the functions
        self.references = references
        self.vocab_counts_df = vocab_counts_df
        self.vocab_counts_df[PROP] = vocab_counts_df[CNT] / sum(
            vocab_counts_df[CNT])
        # self.mlb_list holds num batches x num_sentences
        self.mlb_list = []
        # Index of the subgroup word in the sparse vector
        subgroup_idx = vocab_counts_df.index.get_loc(subgroup)
        print("Calculating co-occurrences...")
        df_coo = self.calc_cooccurrences(subgroup, subgroup_idx)
        vocab_cooc_df = self.set_idx_cols(df_coo, subgroup)
        print("Calculating PMI...")
        pmi_df = self.calc_PMI(vocab_cooc_df, subgroup)
        print("Calculating nPMI...")
        npmi_df = self.calc_nPMI(pmi_df, vocab_cooc_df, subgroup)
        npmi_bias = npmi_df.max(axis=0) + abs(npmi_df.min(axis=0))
        return {"bias": npmi_bias, "co-occurrences": vocab_cooc_df,
                "pmi": pmi_df, "npmi": npmi_df}