in Sources/TransformersCLI/main.swift [67:83]
func run() throws {
let url = URL(filePath: modelPath)
let compiledURL = try compile(at: url)
print("Loading model \(compiledURL)")
let model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
// Using greedy generation for now
var config = model.defaultGenerationConfig
config.doSample = false
config.maxNewTokens = maxLength
print("Warming up...")
generate(model: model, config: config, prompt: prompt, printOutput: false)
print("Generating")
generate(model: model, config: config, prompt: prompt)
}