def adapt_num_symbols()

in learn_bpe.py [0:0]


def adapt_num_symbols(num_symbols, vocab, total_symbols):
    """
    Handle the parameter --total_symbols.
    """
    new_num_symbols = num_symbols
    if total_symbols:
        uniq_char_internal = set()
        uniq_char_final = set()
        for _, word, _ in vocab:
            for char in word[:-1]:
                uniq_char_internal.add(char)
            uniq_char_final.add(word[-1])
        sys.stderr.write('Number of word-internal characters: {0}\n'.format(len(uniq_char_internal)))
        sys.stderr.write('Number of word-final characters: {0}\n'.format(len(uniq_char_final)))
        sys.stderr.write('Reducing number of merge operations by {0}\n'.format(len(uniq_char_internal) + len(uniq_char_final)))
        new_num_symbols -= len(uniq_char_internal) + len(uniq_char_final)

    return new_num_symbols