depth-anything-example/DepthApp/DataModel.swift (92 lines of code) (raw):

import CoreImage import CoreML import SwiftUI import os fileprivate let targetSize = CGSize(width: 518, height: 392) final class DataModel: ObservableObject { let camera = Camera() let context = CIContext() /// The depth model. var model: DepthAnythingV2SmallF16? /// A pixel buffer used as input to the model. let inputPixelBuffer: CVPixelBuffer /// The last image captured from the camera. var lastImage = OSAllocatedUnfairLock<CIImage?>(uncheckedState: nil) /// The resulting depth image. @Published var depthImage: Image? init() { // Create a reusable buffer to avoid allocating memory for every model invocation var buffer: CVPixelBuffer! let status = CVPixelBufferCreate( kCFAllocatorDefault, Int(targetSize.width), Int(targetSize.height), kCVPixelFormatType_32ARGB, nil, &buffer ) guard status == kCVReturnSuccess else { fatalError("Failed to create pixel buffer") } inputPixelBuffer = buffer // Decouple running the model from the camera feed since the model will run slower Task.detached(priority: .userInitiated) { await self.runModel() } Task { await handleCameraFeed() } } func handleCameraFeed() async { let imageStream = camera.previewStream for await image in imageStream { lastImage.withLock({ $0 = image }) } } func runModel() async { try! loadModel() let clock = ContinuousClock() var durations = [ContinuousClock.Duration]() while !Task.isCancelled { let image = lastImage.withLock({ $0 }) if let pixelBuffer = image?.pixelBuffer { let duration = await clock.measure { try? await performInference(pixelBuffer) } durations.append(duration) } let measureInterval = 100 if durations.count == measureInterval { let total = durations.reduce(Duration(secondsComponent: 0, attosecondsComponent: 0), +) let average = total / measureInterval print("Average model runtime: \(average.formatted(.units(allowed: [.milliseconds])))") durations.removeAll(keepingCapacity: true) } // Slow down inference to prevent freezing the UI try? await Task.sleep(for: .milliseconds(10)) } } func loadModel() throws { print("Loading model...") let clock = ContinuousClock() let start = clock.now model = try DepthAnythingV2SmallF16() let duration = clock.now - start print("Model loaded (took \(duration.formatted(.units(allowed: [.seconds, .milliseconds]))))") } func performInference(_ pixelBuffer: CVPixelBuffer) async throws { guard let model else { return } let originalSize = CGSize(width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer)) let inputImage = CIImage(cvPixelBuffer: pixelBuffer).resized(to: targetSize) context.render(inputImage, to: inputPixelBuffer) let result = try model.prediction(image: inputPixelBuffer) let outputImage = CIImage(cvPixelBuffer: result.depth) .resized(to: originalSize) .image Task { @MainActor in depthImage = outputImage } } } fileprivate extension CIImage { var image: Image? { let ciContext = CIContext() guard let cgImage = ciContext.createCGImage(self, from: self.extent) else { return nil } return Image(decorative: cgImage, scale: 1, orientation: .up) } }