in HuggingSnap/Views/VLMEvaluator.swift [90:130]
func load() async throws -> ModelContainer {
switch loadState {
case .idle:
// limit the buffer cache
MLX.GPU.set(cacheLimit: 20 * 1024 * 1024)
// This may make things very slow when way over the limit
// TODO: make this dependent on device + max number of frames
let maxMetalMemory = Int(round(0.82 * Double(os_proc_available_memory())))
MLX.GPU.set(memoryLimit: maxMetalMemory, relaxed: false)
// Load runtime configuration
// TODO: use a fallback if we can't download - ideally the one from the previous run
// Fine-grained read-only token for the HuggingFaceTB org
let hubApi = HubApi()
let config = try await loadConfiguration(hub: HubApi())
runtimeConfiguration = config
let modelConfiguration = ModelConfiguration(id: config.model, defaultPrompt: config.photoUserPrompt)
let modelContainer = try await VLMModelFactory.shared.loadContainer(hub: hubApi,
configuration: modelConfiguration
) { [modelConfiguration] progress in
Task { @MainActor in
self.modelInfo =
"Downloading model: \(Int(progress.fractionCompleted * 100))%"
}
}
let _ = await modelContainer.perform { context in
context.model.numParameters()
}
self.modelInfo = "Finished loading."
loadState = .loaded(modelContainer)
return modelContainer
case .loaded(let modelContainer):
return modelContainer
}
}