Path Lines of Code bindings/node/Cargo.toml 18 bindings/node/build.rs 4 bindings/node/index.js 376 bindings/node/rustfmt.toml 1 bindings/node/src/arc_rwlock_serde.rs 15 bindings/node/src/decoders.rs 161 bindings/node/src/encoding.rs 310 bindings/node/src/lib.rs 13 bindings/node/src/models.rs 303 bindings/node/src/normalizers.rs 159 bindings/node/src/pre_tokenizers.rs 231 bindings/node/src/processors.rs 110 bindings/node/src/tasks/mod.rs 2 bindings/node/src/tasks/models.rs 67 bindings/node/src/tasks/tokenizer.rs 111 bindings/node/src/tokenizer.rs 463 bindings/node/src/trainers.rs 59 bindings/node/src/utils.rs 42 bindings/node/types.ts 6 bindings/python/Cargo.toml 26 bindings/python/MANIFEST.in 7 bindings/python/conftest.py 12 bindings/python/py_src/tokenizers/__init__.py 52 bindings/python/py_src/tokenizers/__init__.pyi 221 bindings/python/py_src/tokenizers/decoders/__init__.py 13 bindings/python/py_src/tokenizers/decoders/__init__.pyi 56 bindings/python/py_src/tokenizers/implementations/__init__.py 6 bindings/python/py_src/tokenizers/implementations/base_tokenizer.py 131 bindings/python/py_src/tokenizers/implementations/bert_wordpiece.py 131 bindings/python/py_src/tokenizers/implementations/byte_level_bpe.py 97 bindings/python/py_src/tokenizers/implementations/char_level_bpe.py 113 bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py 83 bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py 120 bindings/python/py_src/tokenizers/models/__init__.py 6 bindings/python/py_src/tokenizers/models/__init__.pyi 93 bindings/python/py_src/tokenizers/normalizers/__init__.py 23 bindings/python/py_src/tokenizers/normalizers/__init__.pyi 101 bindings/python/py_src/tokenizers/pre_tokenizers/__init__.py 14 bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi 90 bindings/python/py_src/tokenizers/processors/__init__.py 7 bindings/python/py_src/tokenizers/processors/__init__.pyi 40 bindings/python/py_src/tokenizers/tools/__init__.py 1 bindings/python/py_src/tokenizers/tools/visualizer-styles.css 138 bindings/python/py_src/tokenizers/tools/visualizer.py 209 bindings/python/py_src/tokenizers/trainers/__init__.py 6 bindings/python/py_src/tokenizers/trainers/__init__.pyi 28 bindings/python/pyproject.toml 61 bindings/python/scripts/convert.py 342 bindings/python/scripts/sentencepiece_extractor.py 93 bindings/python/scripts/spm_parity_check.py 208 bindings/python/setup.cfg 51 bindings/python/src/decoders.rs 577 bindings/python/src/encoding.rs 244 bindings/python/src/error.rs 35 bindings/python/src/lib.rs 39 bindings/python/src/models.rs 785 bindings/python/src/normalizers.rs 760 bindings/python/src/pre_tokenizers.rs 791 bindings/python/src/processors.rs 642 bindings/python/src/token.rs 40 bindings/python/src/tokenizer.rs 1060 bindings/python/src/trainers.rs 836 bindings/python/src/utils/iterators.rs 90 bindings/python/src/utils/mod.rs 62 bindings/python/src/utils/normalization.rs 477 bindings/python/src/utils/pretokenization.rs 254 bindings/python/src/utils/regex.rs 20 bindings/python/src/utils/serde_pyo3.rs 454 bindings/python/stub.py 141 tokenizers/Cargo.toml 82 tokenizers/benches/bert_benchmark.rs 132 tokenizers/benches/bpe_benchmark.rs 115 tokenizers/benches/common/mod.rs 76 tokenizers/benches/layout_benchmark.rs 64 tokenizers/benches/llama3_benchmark.rs 64 tokenizers/benches/unigram_benchmark.rs 37 tokenizers/src/decoders/bpe.rs 31 tokenizers/src/decoders/byte_fallback.rs 91 tokenizers/src/decoders/fuse.rs 34 tokenizers/src/decoders/mod.rs 39 tokenizers/src/decoders/sequence.rs 48 tokenizers/src/decoders/strip.rs 67 tokenizers/src/decoders/wordpiece.rs 74 tokenizers/src/lib.rs 20 tokenizers/src/models/bpe/mod.rs 59 tokenizers/src/models/bpe/model.rs 834 tokenizers/src/models/bpe/serialization.rs 217 tokenizers/src/models/bpe/trainer.rs 598 tokenizers/src/models/bpe/word.rs 277 tokenizers/src/models/mod.rs 59 tokenizers/src/models/unigram/lattice.rs 588 tokenizers/src/models/unigram/mod.rs 8 tokenizers/src/models/unigram/model.rs 509 tokenizers/src/models/unigram/serialization.rs 98 tokenizers/src/models/unigram/trainer.rs 685 tokenizers/src/models/unigram/trie.rs 43 tokenizers/src/models/wordlevel/mod.rs 204 tokenizers/src/models/wordlevel/serialization.rs 114 tokenizers/src/models/wordlevel/trainer.rs 151 tokenizers/src/models/wordpiece/mod.rs 250 tokenizers/src/models/wordpiece/serialization.rs 133 tokenizers/src/models/wordpiece/trainer.rs 147 tokenizers/src/normalizers/bert.rs 102 tokenizers/src/normalizers/byte_level.rs 164 tokenizers/src/normalizers/mod.rs 40 tokenizers/src/normalizers/precompiled.rs 67 tokenizers/src/normalizers/prepend.rs 56 tokenizers/src/normalizers/replace.rs 131 tokenizers/src/normalizers/strip.rs 133 tokenizers/src/normalizers/unicode.rs 91 tokenizers/src/normalizers/utils.rs 48 tokenizers/src/pre_tokenizers/bert.rs 76 tokenizers/src/pre_tokenizers/byte_level.rs 513 tokenizers/src/pre_tokenizers/delimiter.rs 21 tokenizers/src/pre_tokenizers/digits.rs 93 tokenizers/src/pre_tokenizers/fixed_length.rs 111 tokenizers/src/pre_tokenizers/metaspace.rs 331 tokenizers/src/pre_tokenizers/mod.rs 258 tokenizers/src/pre_tokenizers/punctuation.rs 72 tokenizers/src/pre_tokenizers/sequence.rs 73 tokenizers/src/pre_tokenizers/split.rs 33 tokenizers/src/pre_tokenizers/unicode_scripts/mod.rs 3 tokenizers/src/pre_tokenizers/unicode_scripts/pre_tokenizer.rs 132 tokenizers/src/pre_tokenizers/whitespace.rs 94 tokenizers/src/processors/bert.rs 268 tokenizers/src/processors/mod.rs 112 tokenizers/src/processors/roberta.rs 309 tokenizers/src/processors/sequence.rs 156 tokenizers/src/processors/template.rs 854 tokenizers/src/tokenizer/added_vocabulary.rs 799 tokenizers/src/tokenizer/encoding.rs 780 tokenizers/src/tokenizer/mod.rs 1185 tokenizers/src/tokenizer/normalizer.rs 2002 tokenizers/src/tokenizer/pattern.rs 187 tokenizers/src/tokenizer/pre_tokenizer.rs 228 tokenizers/src/tokenizer/serialization.rs 215 tokenizers/src/utils/cache.rs 85 tokenizers/src/utils/fancy.rs 55 tokenizers/src/utils/from_pretrained.rs 58 tokenizers/src/utils/iter.rs 71 tokenizers/src/utils/mod.rs 112 tokenizers/src/utils/onig.rs 15 tokenizers/src/utils/padding.rs 123 tokenizers/src/utils/parallelism.rs 219 tokenizers/src/utils/progress.rs 12 tokenizers/src/utils/truncation.rs 279