Sources/Models/LanguageModel.swift (4 lines): - line 44: // TODO: support a set of fixed shapes (keeping the first one here) - line 113: // TODO: exceptions - line 131: // TODO: maybe try to support models with "token_scores" too (after the softmax) - line 206: // TODO: retrieve from the json: https://huggingface.co/nlpcloud/instruct-gpt-j-fp16/blob/main/config.json#L26 Sources/Generation/Generation.swift (3 lines): - line 30: // TODO: callbacks (for streaming) - line 40: // TODO: additional stopping criteria - line 55: // TODO: additional stopping criteria Sources/Tokenizers/Tokenizer.swift (2 lines): - line 79: // TODO: support lstrip, rstrip, normalized, etc. - line 321: // TODO: specialTokens are stored but never used Sources/Tokenizers/Normalizer.swift (2 lines): - line 230: // TODO: use `precompiledCharsmap` (base64-encoded string) from the configuration - line 234: // TODO: This is a simplified implementation. Sources/Tokenizers/Trie.swift (1 line): - line 78: // TODO: maybe store the scores here if it's helpful? Sources/Tokenizers/Decoder.swift (1 line): - line 38: // TODO: not sure if we need to include `addedTokens` in all the decoder initializers (and the protocol) Sources/Tokenizers/BPETokenizer.swift (1 line): - line 196: // TODO: if config.byte_fallback is False, append the unknown token instead Sources/TensorUtils/Math.swift (1 line): - line 162: // TODO: handle Double, etc. Sources/Tokenizers/TokenLattice.swift (1 line): - line 89: // TODO: the reference implementations have a few more clones here: verify Sources/TensorUtils/MLMultiArray+Utils.swift (1 line): - line 135: // TODO: use srcPtr instead of array subscripting. Sources/Tokenizers/PreTokenizer.swift (1 line): - line 157: // FIXME: (2b) always prepends, we are not passing section info