Sources/GoogleAI/ModelContent.swift (126 lines of code) (raw):

// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import Foundation /// A type describing data in media formats interpretable by an AI model. Each generative AI /// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value /// may comprise multiple heterogeneous ``ModelContent/Part``s. @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *) public struct ModelContent: Equatable { /// A discrete piece of data in a media format intepretable by an AI model. Within a single value /// of ``Part``, different data types may not mix. public enum Part: Equatable { /// Text value. case text(String) /// Data with a specified media type. /// /// > Note: Supported media types depends on the model; see /// > [supported file /// > formats](https://ai.google.dev/tutorials/prompting_with_media#supported_file_formats) /// > for details. case data(mimetype: String, Data) /// URI-based data with a specified media type. /// /// > Important: Files must be uploaded using the /// > [`media.upload` REST API](https://ai.google.dev/api/rest/v1beta/media/upload) or another /// > Gemini SDK. /// /// > Note: Supported media types depends on the model; see /// > [supported file /// > formats](https://ai.google.dev/tutorials/prompting_with_media#supported_file_formats) /// > for details. case fileData(mimetype: String, uri: String) /// A predicted function call returned from the model. case functionCall(FunctionCall) /// A response to a function call. case functionResponse(FunctionResponse) /// Code generated by the model that is meant to be executed. case executableCode(ExecutableCode) /// Result of executing the ``ExecutableCode``. case codeExecutionResult(CodeExecutionResult) // MARK: Convenience Initializers /// Convenience function for populating a Part with JPEG data. public static func jpeg(_ data: Data) -> Self { return .data(mimetype: "image/jpeg", data) } /// Convenience function for populating a Part with PNG data. public static func png(_ data: Data) -> Self { return .data(mimetype: "image/png", data) } /// Returns the text contents of this ``Part``, if it contains text. public var text: String? { switch self { case let .text(contents): return contents default: return nil } } } /// The role of the entity creating the ``ModelContent``. For user-generated client requests, /// for example, the role is `user`. public let role: String? /// The data parts comprising this ``ModelContent`` value. public let parts: [Part] /// Creates a new value from any data or `Array` of data interpretable as a /// ``Part``. See ``ThrowingPartsRepresentable`` for types that can be interpreted as `Part`s. public init(role: String? = "user", parts: some ThrowingPartsRepresentable) throws { self.role = role try self.parts = parts.tryPartsValue() } /// Creates a new value from any data or `Array` of data interpretable as a /// ``Part``. See ``ThrowingPartsRepresentable`` for types that can be interpreted as `Part`s. public init(role: String? = "user", parts: some PartsRepresentable) { self.role = role self.parts = parts.partsValue } /// Creates a new value from a list of ``Part``s. public init(role: String? = "user", parts: [Part]) { self.role = role self.parts = parts } /// Creates a new value from any data interpretable as a ``Part``. See /// ``ThrowingPartsRepresentable`` /// for types that can be interpreted as `Part`s. public init(role: String? = "user", _ parts: any ThrowingPartsRepresentable...) throws { let content = try parts.flatMap { try $0.tryPartsValue() } self.init(role: role, parts: content) } /// Creates a new value from any data interpretable as a ``Part``. See /// ``ThrowingPartsRepresentable`` /// for types that can be interpreted as `Part`s. public init(role: String? = "user", _ parts: [PartsRepresentable]) { let content = parts.flatMap { $0.partsValue } self.init(role: role, parts: content) } } // MARK: Codable Conformances @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *) extension ModelContent: Codable {} @available(iOS 15.0, macOS 11.0, macCatalyst 15.0, *) extension ModelContent.Part: Codable { enum CodingKeys: String, CodingKey { case text case inlineData case fileData case functionCall case functionResponse case executableCode case codeExecutionResult } enum InlineDataKeys: String, CodingKey { case mimeType = "mime_type" case bytes = "data" } enum FileDataKeys: String, CodingKey { case mimeType = "mime_type" case url = "file_uri" } public func encode(to encoder: Encoder) throws { var container = encoder.container(keyedBy: CodingKeys.self) switch self { case let .text(a0): try container.encode(a0, forKey: .text) case let .data(mimetype, bytes): var inlineDataContainer = container.nestedContainer( keyedBy: InlineDataKeys.self, forKey: .inlineData ) try inlineDataContainer.encode(mimetype, forKey: .mimeType) try inlineDataContainer.encode(bytes, forKey: .bytes) case let .fileData(mimetype: mimetype, url): var fileDataContainer = container.nestedContainer( keyedBy: FileDataKeys.self, forKey: .fileData ) try fileDataContainer.encode(mimetype, forKey: .mimeType) try fileDataContainer.encode(url, forKey: .url) case let .functionCall(functionCall): try container.encode(functionCall, forKey: .functionCall) case let .functionResponse(functionResponse): try container.encode(functionResponse, forKey: .functionResponse) case let .executableCode(executableCode): try container.encode(executableCode, forKey: .executableCode) case let .codeExecutionResult(codeExecutionResult): try container.encode(codeExecutionResult, forKey: .codeExecutionResult) } } public init(from decoder: Decoder) throws { let values = try decoder.container(keyedBy: CodingKeys.self) if values.contains(.text) { self = try .text(values.decode(String.self, forKey: .text)) } else if values.contains(.inlineData) { let dataContainer = try values.nestedContainer( keyedBy: InlineDataKeys.self, forKey: .inlineData ) let mimetype = try dataContainer.decode(String.self, forKey: .mimeType) let bytes = try dataContainer.decode(Data.self, forKey: .bytes) self = .data(mimetype: mimetype, bytes) } else if values.contains(.functionCall) { self = try .functionCall(values.decode(FunctionCall.self, forKey: .functionCall)) } else if values.contains(.executableCode) { self = try .executableCode(values.decode(ExecutableCode.self, forKey: .executableCode)) } else if values.contains(.codeExecutionResult) { self = try .codeExecutionResult(values.decode( CodeExecutionResult.self, forKey: .codeExecutionResult )) } else { throw DecodingError.dataCorrupted(.init( codingPath: [CodingKeys.text, CodingKeys.inlineData], debugDescription: "No text, inline data or function call was found." )) } } }