func load()

in HuggingSnap/Views/VLMEvaluator.swift [90:130]


    func load() async throws -> ModelContainer {
        switch loadState {
        case .idle:
                // limit the buffer cache
                MLX.GPU.set(cacheLimit: 20 * 1024 * 1024)

                // This may make things very slow when way over the limit
                // TODO: make this dependent on device + max number of frames
                let maxMetalMemory = Int(round(0.82 * Double(os_proc_available_memory())))
                MLX.GPU.set(memoryLimit: maxMetalMemory, relaxed: false)

                // Load runtime configuration
                // TODO: use a fallback if we can't download - ideally the one from the previous run
                // Fine-grained read-only token for the HuggingFaceTB org
                let hubApi = HubApi()
                let config = try await loadConfiguration(hub: HubApi())
                runtimeConfiguration = config

                let modelConfiguration = ModelConfiguration(id: config.model, defaultPrompt: config.photoUserPrompt)

                let modelContainer = try await VLMModelFactory.shared.loadContainer(hub: hubApi,
                    configuration: modelConfiguration
                ) { [modelConfiguration] progress in
                    Task { @MainActor in
                        self.modelInfo =
                            "Downloading model: \(Int(progress.fractionCompleted * 100))%"
                    }
                }

                let _ = await modelContainer.perform { context in
                    context.model.numParameters()
                }

                self.modelInfo = "Finished loading."
                loadState = .loaded(modelContainer)
                return modelContainer
            
        case .loaded(let modelContainer):
            return modelContainer
        }
    }