in HuggingChat-Mac/LocalLLM/ModelManager.swift [235:282]
func generate(prompt: String) async {
guard !running else { return }
guard globalContainer != nil else { return }
guard globalConfig != nil else { return }
running = true
self.outputText = ""
do {
messages.append(["role": "user", "content": prompt])
let promptTokens = try await globalContainer!.perform { _, tokenizer in
try tokenizer.applyChatTemplate(messages: messages)
}
MLXRandom.seed(UInt64(Date.timeIntervalSinceReferenceDate * 1000))
let result = await globalContainer!.perform { model, tokenizer in
MLXLLM.generate(
promptTokens: promptTokens, parameters: generateParameters, model: model,
tokenizer: tokenizer, extraEOSTokens: globalConfig!.extraEOSTokens
) { tokens in
if tokens.count % displayEveryNTokens == 0 {
let text = tokenizer.decode(tokens: tokens)
Task { @MainActor in
self.outputText = text
}
}
if tokens.count >= maxTokens {
return .stop
} else {
return .more
}
}
}
if result.output != self.outputText {
self.outputText = result.output
messages.append(["role": "system", "content": result.output])
}
} catch {
self.loadState = .error(error.localizedDescription)
// outputText = "Failed: \(error.localizedDescription)"
}
running = false
}