def __init__()

in data_measurements/npmi/npmi.py [0:0]


    def __init__(self, dstats, identity_terms, load_only=False, use_cache=False,
                 save=True):
        # The data measurements tool settings (dataset, config, etc.)
        self.dstats = dstats
        # Whether we can use caching (when live, no).
        self.load_only = load_only
        # Whether to first try using cache before calculating
        self.use_cache = use_cache
        # Whether to save results
        self.save = save
        # Tokenized dataset
        tokenized_df = dstats.tokenized_df
        self.tokenized_sentence_df = tokenized_df[TOKENIZED_FIELD]
        # Dataframe of shape #vocab x 1 (count)
        self.vocab_counts_df = dstats.vocab_counts_df
        # Cutoff for the number of times something must occur to be included
        self.min_count = dstats.min_vocab_count
        self.cache_path = pjoin(dstats.dataset_cache_dir, SING)
        self.avail_terms_json_fid = pjoin(self.cache_path,
                                          "identity_terms.json")
        # TODO: Users ideally can type in whatever words they want.
        # This is the full list of terms.
        self.identity_terms = identity_terms
        logs.info("Using term list:")
        logs.info(self.identity_terms)
        # identity_terms terms that are available more than MIN_VOCAB_COUNT
        self.avail_identity_terms = []
        # TODO: Let users specify
        self.open_class_only = True
        # Single-word associations
        self.assoc_results_dict = defaultdict(dict)
        # Paired term association bias
        self.bias_results_dict = defaultdict(dict)
        # Dataframes used in displays.
        self.bias_dfs_dict = defaultdict(dict)
        # Results of the single word associations and their paired bias values.
        # Formatted as:
        # {(s1,s2)): {pd.DataFrame({s1-s2:diffs, s1:assoc, s2:assoc})}}
        self.results_dict = defaultdict(lambda: defaultdict(dict))
        # Filenames for cache, based on the results
        self.filenames_dict = defaultdict(dict)