in Sources/Models/LanguageModel.swift [112:137]
func predictNextTokenScores(_ tokens: InputTokens, config: GenerationConfig) -> any MLShapedArrayProtocol {
// TODO: exceptions
// Maybe pad or truncate
let maxTokens = min(tokens.count, maxContextLength)
let padLength = maxTokens >= minContextLength ? 0 : minContextLength - maxTokens
let inputTokens = Array(tokens[0..<maxTokens]) + Array(repeating: config.padTokenId ?? 0, count: padLength)
let inputIds = MLShapedArray<Int32>(scalars: inputTokens.map { Int32($0) }, shape: inputIdsShape)
var inputDictionary = [inputIdsName: MLFeatureValue(shapedArray: inputIds)]
if requiresAttention {
let mask = Array(repeating: 1, count: maxTokens) + Array(repeating: 0, count: padLength)
let attentionMask = MLShapedArray<Int32>(scalars: mask.map { Int32($0) }, shape: inputIdsShape)
inputDictionary[attention_mask] = MLFeatureValue(shapedArray: attentionMask)
}
let input = try! MLDictionaryFeatureProvider(dictionary: inputDictionary)
let output = try! model.prediction(from: input)
// TODO: maybe try to support models with "token_scores" too (after the softmax)
assert(output.featureNames.first! == "logits")
let scores = output.featureValue(for: output.featureNames.first!)!.shapedArrayValue(of: Float.self)!
let nextTokenScores = scores[0, maxTokens - 1]
return nextTokenScores
}