The distribution of size of files (measured in lines of code).
File | # lines | # units |
---|---|---|
fast_wordpiece_tokenizer_model_builder.cc in tensorflow_text/core/kernels |
617 | 22 |
sentencepiece_kernels.cc in tensorflow_text/core/kernels |
610 | 21 |
ragged_tensor_to_tensor_tflite.cc in tensorflow_text/core/kernels |
565 | 20 |
sentence_fragmenter_v2.cc in tensorflow_text/core/kernels |
522 | 21 |
fast_wordpiece_tokenizer.cc in tensorflow_text/core/kernels |
439 | 10 |
constrained_sequence.cc in tensorflow_text/core/kernels |
329 | 8 |
normalize_kernels.cc in tensorflow_text/core/kernels |
310 | 12 |
mst_solver.h in tensorflow_text/core/kernels |
285 | 16 |
sentence_fragmenter.cc in tensorflow_text/core/kernels |
278 | 17 |
fast_wordpiece_tokenizer_kernel_template.h in tensorflow_text/core/kernels |
271 | 12 |
utils.py in tensorflow_text/tools/wordpiece_vocab |
249 | 33 |
pointer_ops.py in tensorflow_text/python/ops |
247 | 8 |
wordpiece_kernel.cc in tensorflow_text/core/kernels |
244 | 12 |
wordpiece_tokenizer_learner_lib.py in tensorflow_text/tools/wordpiece_vocab |
214 | 12 |
wordpiece_tokenizer.cc in tensorflow_text/core/kernels |
203 | 8 |
ngrams_kernel_template.h in tensorflow_text/core/kernels |
197 | 7 |
sentence_breaking_kernels.cc in tensorflow_text/core/kernels |
196 | 5 |
constrained_sequence_kernel.cc in tensorflow_text/core/kernels |
189 | 5 |
wordshape_ops.py in tensorflow_text/python/ops |
188 | 6 |
sentencepiece_tokenizer.py in tensorflow_text/python/ops |
185 | 9 |
sentence_breaking_utils.cc in tensorflow_text/core/kernels |
183 | 7 |
benchmark_utils.py in tensorflow_text/python/benchmarks |
182 | 9 |
item_selector_ops.py in tensorflow_text/python/ops |
179 | 15 |
tokenizer_from_logits_kernel.cc in tensorflow_text/core/kernels |
174 | 6 |
tokenizers_benchmarks.py in tensorflow_text/python/benchmarks |
166 | 12 |
split_merge_tokenize_kernel.cc in tensorflow_text/core/kernels |
164 | 6 |
rouge_l_kernel.cc in tensorflow_text/core/kernels |
160 | 6 |
generate_vocab.py in tensorflow_text/tools/wordpiece_vocab |
160 | 2 |
wordpiece_tokenizer.py in tensorflow_text/python/ops |
160 | 6 |
whitespace_tokenizer_kernel_template.h in tensorflow_text/core/kernels |
159 | 6 |
tokenization_layers.py in tensorflow_text/python/keras/layers |
153 | 14 |
regex_split_kernels.cc in tensorflow_text/core/kernels |
148 | 5 |
sentencepiece_ops.cc in tensorflow_text/core/ops |
145 | - |
ops_benchmarks.py in tensorflow_text/python/benchmarks |
139 | 16 |
mst_op_kernels.cc in tensorflow_text/core/kernels |
138 | 3 |
trimmer_ops.py in tensorflow_text/python/ops |
138 | 11 |
bert_tokenizer.py in tensorflow_text/python/ops |
128 | 10 |
tftext.bzl in tensorflow_text |
128 | - |
unicode_script_tokenize_kernel.cc in tensorflow_text/core/kernels |
125 | 2 |
masking_ops.py in tensorflow_text/python/ops |
117 | 9 |
fast_wordpiece_tokenizer.py in tensorflow_text/python/ops |
107 | 4 |
sentence_fragmenter.h in tensorflow_text/core/kernels |
101 | 5 |
whitespace_tokenize_kernel.cc in tensorflow_text/core/kernels |
101 | 2 |
tokenizer_from_logits_op.cc in tensorflow_text/core/ops |
98 | 1 |
disjoint_set_forest.h in tensorflow_text/core/kernels |
98 | 6 |
fast_wordpiece_tokenizer.h in tensorflow_text/core/kernels |
92 | - |
unicode_script_tokenizer.py in tensorflow_text/python/ops |
92 | 5 |
wordpiece_op.cc in tensorflow_text/core/ops |
90 | 1 |
darts_clone_trie_wrapper.h in tensorflow_text/core/kernels |
88 | 13 |
viterbi_decode.py in tensorflow_text/python/numpy |
88 | 3 |
File | # lines | # units |
---|---|---|
utils.py in tensorflow_text/tools/wordpiece_vocab |
249 | 33 |
fast_wordpiece_tokenizer_model_builder.cc in tensorflow_text/core/kernels |
617 | 22 |
sentence_fragmenter_v2.cc in tensorflow_text/core/kernels |
522 | 21 |
sentencepiece_kernels.cc in tensorflow_text/core/kernels |
610 | 21 |
ragged_tensor_to_tensor_tflite.cc in tensorflow_text/core/kernels |
565 | 20 |
sentence_fragmenter.cc in tensorflow_text/core/kernels |
278 | 17 |
mst_solver.h in tensorflow_text/core/kernels |
285 | 16 |
ops_benchmarks.py in tensorflow_text/python/benchmarks |
139 | 16 |
item_selector_ops.py in tensorflow_text/python/ops |
179 | 15 |
tokenization_layers.py in tensorflow_text/python/keras/layers |
153 | 14 |
darts_clone_trie_wrapper.h in tensorflow_text/core/kernels |
88 | 13 |
normalize_kernels.cc in tensorflow_text/core/kernels |
310 | 12 |
fast_wordpiece_tokenizer_kernel_template.h in tensorflow_text/core/kernels |
271 | 12 |
wordpiece_kernel.cc in tensorflow_text/core/kernels |
244 | 12 |
wordpiece_tokenizer_learner_lib.py in tensorflow_text/tools/wordpiece_vocab |
214 | 12 |
tokenizers_benchmarks.py in tensorflow_text/python/benchmarks |
166 | 12 |
trimmer_ops.py in tensorflow_text/python/ops |
138 | 11 |
fast_wordpiece_tokenizer.cc in tensorflow_text/core/kernels |
439 | 10 |
bert_tokenizer.py in tensorflow_text/python/ops |
128 | 10 |
masking_ops.py in tensorflow_text/python/ops |
117 | 9 |
There are 11 files with lines longer than 120 characters. In total, there are 12 long lines.
File | # lines | # units | # long lines |
---|---|---|---|
whitespace_tokenizer.py in tensorflow_text/python/ops |
72 | 4 | 2 |
sentence_breaking_ops.py in tensorflow_text/python/ops |
46 | 3 | 1 |
fast_wordpiece_tokenizer.py in tensorflow_text/python/ops |
107 | 4 | 1 |
state_based_sentence_breaker_op.py in tensorflow_text/python/ops |
54 | 2 | 1 |
viterbi_constrained_sequence_op.py in tensorflow_text/python/ops |
51 | 1 | 1 |
split_merge_tokenizer.py in tensorflow_text/python/ops |
83 | 3 | 1 |
unicode_script_tokenizer.py in tensorflow_text/python/ops |
92 | 5 | 1 |
split_merge_from_logits_tokenizer.py in tensorflow_text/python/ops |
40 | 3 | 1 |
greedy_constrained_sequence_op.py in tensorflow_text/python/ops |
51 | 1 | 1 |
sentencepiece_tokenizer.py in tensorflow_text/python/ops |
185 | 9 | 1 |
text_similarity_metric_ops.py in tensorflow_text/python/metrics |
28 | 1 | 1 |