Sources/SparkConnect/ArrowArrayBuilder.swift (285 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. import Foundation /// @nodoc public protocol ArrowArrayHolderBuilder { func toHolder() throws -> ArrowArrayHolder func appendAny(_ val: Any?) } public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>: ArrowArrayHolderBuilder { let type: ArrowType let bufferBuilder: T public var length: UInt { return self.bufferBuilder.length } public var capacity: UInt { return self.bufferBuilder.capacity } public var nullCount: UInt { return self.bufferBuilder.nullCount } public var offset: UInt { return self.bufferBuilder.offset } fileprivate init(_ type: ArrowType) throws { self.type = type self.bufferBuilder = try T() } public func append(_ vals: T.ItemType?...) { for val in vals { self.bufferBuilder.append(val) } } public func append(_ vals: [T.ItemType?]) { for val in vals { self.bufferBuilder.append(val) } } public func append(_ val: T.ItemType?) { self.bufferBuilder.append(val) } public func appendAny(_ val: Any?) { self.bufferBuilder.append(val as? T.ItemType) } public func finish() throws -> ArrowArray<T.ItemType> { let buffers = self.bufferBuilder.finish() let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount) let array = try U(arrowData) return array } public func getStride() -> Int { return self.type.getStride() } public func toHolder() throws -> ArrowArrayHolder { return try ArrowArrayHolderImpl(self.finish()) } } public class NumberArrayBuilder<T>: ArrowArrayBuilder<FixedBufferBuilder<T>, FixedArray<T>> { fileprivate convenience init() throws { try self.init(ArrowType(ArrowType.infoForNumericType(T.self))) } } public class StringArrayBuilder: ArrowArrayBuilder<VariableBufferBuilder<String>, StringArray> { fileprivate convenience init() throws { try self.init(ArrowType(ArrowType.ArrowString)) } } public class BinaryArrayBuilder: ArrowArrayBuilder<VariableBufferBuilder<Data>, BinaryArray> { fileprivate convenience init() throws { try self.init(ArrowType(ArrowType.ArrowBinary)) } } public class BoolArrayBuilder: ArrowArrayBuilder<BoolBufferBuilder, BoolArray> { fileprivate convenience init() throws { try self.init(ArrowType(ArrowType.ArrowBool)) } } public class Date32ArrayBuilder: ArrowArrayBuilder<Date32BufferBuilder, Date32Array> { fileprivate convenience init() throws { try self.init(ArrowType(ArrowType.ArrowDate32)) } } public class Date64ArrayBuilder: ArrowArrayBuilder<Date64BufferBuilder, Date64Array> { fileprivate convenience init() throws { try self.init(ArrowType(ArrowType.ArrowDate64)) } } public class Time32ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time32>, Time32Array> { fileprivate convenience init(_ unit: ArrowTime32Unit) throws { try self.init(ArrowTypeTime32(unit)) } } public class Time64ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time64>, Time64Array> { fileprivate convenience init(_ unit: ArrowTime64Unit) throws { try self.init(ArrowTypeTime64(unit)) } } public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> { let builders: [any ArrowArrayHolderBuilder] let fields: [ArrowField] public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws { self.fields = fields self.builders = builders try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) self.bufferBuilder.initializeTypeInfo(fields) } public init(_ fields: [ArrowField]) throws { self.fields = fields var builders = [any ArrowArrayHolderBuilder]() for field in fields { builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type)) } self.builders = builders try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) } public override func append(_ values: [Any?]?) { self.bufferBuilder.append(values) if let anyValues = values { for index in 0..<builders.count { self.builders[index].appendAny(anyValues[index]) } } else { for index in 0..<builders.count { self.builders[index].appendAny(nil) } } } public override func finish() throws -> StructArray { let buffers = self.bufferBuilder.finish() var childData = [ArrowData]() for builder in self.builders { childData.append(try builder.toHolder().array.arrowData) } let arrowData = try ArrowData( self.type, buffers: buffers, children: childData, nullCount: self.nullCount, length: self.length) let structArray = try StructArray(arrowData) return structArray } } public class ArrowArrayBuilders { public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity _ builderType: Any.Type ) throws -> ArrowArrayHolderBuilder { if builderType == Int8.self || builderType == Int8?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<Int8> } else if builderType == Int16.self || builderType == Int16?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<Int16> } else if builderType == Int32.self || builderType == Int32?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<Int32> } else if builderType == Int64.self || builderType == Int64?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<Int64> } else if builderType == Float.self || builderType == Float?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<Float> } else if builderType == UInt8.self || builderType == UInt8?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<UInt8> } else if builderType == UInt16.self || builderType == UInt16?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<UInt16> } else if builderType == UInt32.self || builderType == UInt32?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<UInt32> } else if builderType == UInt64.self || builderType == UInt64?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<UInt64> } else if builderType == Double.self || builderType == Double?.self { return try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder<Double> } else if builderType == String.self || builderType == String?.self { return try ArrowArrayBuilders.loadStringArrayBuilder() } else if builderType == Bool.self || builderType == Bool?.self { return try ArrowArrayBuilders.loadBoolArrayBuilder() } else if builderType == Date.self || builderType == Date?.self { return try ArrowArrayBuilders.loadDate64ArrayBuilder() } else { throw ArrowError.invalid("Invalid type for builder: \(builderType)") } } public static func isValidBuilderType<T>(_ type: T.Type) -> Bool { return type == Int8?.self || type == Int16?.self || type == Int32?.self || type == Int64?.self || type == UInt8?.self || type == UInt16?.self || type == UInt32?.self || type == UInt64?.self || type == String?.self || type == Double?.self || type == Float?.self || type == Date?.self || type == Bool?.self || type == Bool.self || type == Int8.self || type == Int16.self || type == Int32.self || type == Int64.self || type == UInt8.self || type == UInt16.self || type == UInt32.self || type == UInt64.self || type == String.self || type == Double.self || type == Float.self || type == Date.self } public static func loadStructArrayBuilderForType<T>(_ obj: T) throws -> StructArrayBuilder { let mirror = Mirror(reflecting: obj) var builders = [ArrowArrayHolderBuilder]() var fields = [ArrowField]() for (property, value) in mirror.children { guard let propertyName = property else { continue } let builderType = type(of: value) let arrowType = ArrowType(ArrowType.infoForType(builderType)) fields.append(ArrowField(propertyName, type: arrowType, isNullable: true)) builders.append(try loadBuilder(arrowType: arrowType)) } return try StructArrayBuilder(fields, builders: builders) } public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity arrowType: ArrowType ) throws -> ArrowArrayHolderBuilder { switch arrowType.id { case .uint8: return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt8> case .uint16: return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt16> case .uint32: return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt32> case .uint64: return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt64> case .int8: return try loadNumberArrayBuilder() as NumberArrayBuilder<Int8> case .int16: return try loadNumberArrayBuilder() as NumberArrayBuilder<Int16> case .int32: return try loadNumberArrayBuilder() as NumberArrayBuilder<Int32> case .int64: return try loadNumberArrayBuilder() as NumberArrayBuilder<Int64> case .double: return try loadNumberArrayBuilder() as NumberArrayBuilder<Double> case .float: return try loadNumberArrayBuilder() as NumberArrayBuilder<Float> case .string: return try StringArrayBuilder() case .boolean: return try BoolArrayBuilder() case .binary: return try BinaryArrayBuilder() case .date32: return try Date32ArrayBuilder() case .date64: return try Date64ArrayBuilder() case .time32: guard let timeType = arrowType as? ArrowTypeTime32 else { throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") } return try Time32ArrayBuilder(timeType.unit) case .time64: guard let timeType = arrowType as? ArrowTypeTime64 else { throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") } return try Time64ArrayBuilder(timeType.unit) default: throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") } } public static func loadNumberArrayBuilder<T>() throws -> NumberArrayBuilder<T> { let type = T.self if type == Int8.self { return try NumberArrayBuilder<T>() } else if type == Int16.self { return try NumberArrayBuilder<T>() } else if type == Int32.self { return try NumberArrayBuilder<T>() } else if type == Int64.self { return try NumberArrayBuilder<T>() } else if type == UInt8.self { return try NumberArrayBuilder<T>() } else if type == UInt16.self { return try NumberArrayBuilder<T>() } else if type == UInt32.self { return try NumberArrayBuilder<T>() } else if type == UInt64.self { return try NumberArrayBuilder<T>() } else if type == Float.self { return try NumberArrayBuilder<T>() } else if type == Double.self { return try NumberArrayBuilder<T>() } else { throw ArrowError.unknownType("Type is invalid for NumberArrayBuilder") } } public static func loadStringArrayBuilder() throws -> StringArrayBuilder { return try StringArrayBuilder() } public static func loadBoolArrayBuilder() throws -> BoolArrayBuilder { return try BoolArrayBuilder() } public static func loadDate32ArrayBuilder() throws -> Date32ArrayBuilder { return try Date32ArrayBuilder() } public static func loadDate64ArrayBuilder() throws -> Date64ArrayBuilder { return try Date64ArrayBuilder() } public static func loadBinaryArrayBuilder() throws -> BinaryArrayBuilder { return try BinaryArrayBuilder() } public static func loadTime32ArrayBuilder(_ unit: ArrowTime32Unit) throws -> Time32ArrayBuilder { return try Time32ArrayBuilder(unit) } public static func loadTime64ArrayBuilder(_ unit: ArrowTime64Unit) throws -> Time64ArrayBuilder { return try Time64ArrayBuilder(unit) } }