src/datatrove/pipeline/filters/gopher_quality_filter.py (1 line): - line 56: self.max_non_alpha_words_ratio = max_non_alpha_words_ratio # TODO rename to min_alpha_words_ratio src/datatrove/pipeline/tokens/context_shuffler.py (1 line): - line 75: # TODO: replace mmap implementation which only works locally src/datatrove/pipeline/dedup/bloom_filter.py (1 line): - line 93: # TODO: Add support for 64-bit src/datatrove/pipeline/readers/ipc.py (1 line): - line 66: # TODO: add option to disable reading metadata (https://github.com/apache/arrow/issues/13827 needs to be addressed first) src/datatrove/pipeline/dedup/exact_substrings.py (1 line): - line 302: # TODO improve src/datatrove/utils/perplexity.py (1 line): - line 155: # TODO: integrate these options to simplify_text