def __init__()

in learn_bpe.py [0:0]


    def __init__(self, vocab, probabilistic):
        self.vocab = vocab
        self.probabilistic = probabilistic

        raw_stats = defaultdict(int)  # From pairs to counts
        self.vocab_entries_for_pair = defaultdict(set)  # From pairs to lists of indices in vocab
        for pos, word, freq in self.vocab:
            word_pair_stats = self.get_pair_stats_from_word(word)
            for pair, count in word_pair_stats.items():
                raw_stats[pair] += count * freq
                self.vocab_entries_for_pair[pair].add(pos)

        # For probabilistic BPE we need the counts of the produced items
        if self.probabilistic:
            self.produced_count = defaultdict(int)
            self.n_running_symbols = 0
            # Store the counts of the initial units (characters)
            for _, word, freq in self.vocab:
                for unit in word:
                    self.produced_count[unit] += freq
                    self.n_running_symbols += freq

        # stats_heap will contain pairs (freq, word)
        self.stats_heap = HeapWithInverseIndex(value_score_function=self._get_scoring_function(),
                                               use_score_caching=True,
                                               key_function=lambda x: x[1])
        for pair in [(i[1], i[0]) for i in raw_stats.items()]:
            self.stats_heap.insert(pair)