Sources/Hub/BinaryDistinct.swift (178 lines of code) (raw):

// // BinaryDistinct.swift // swift-transformers // // Created by Piotr Kowalczuk on 06.03.25. // import Foundation /// BinaryDistinctString helps to overcome limitations of both String and NSString types. Where the prior is performing unicode normalization and the following is not Sendable. For more reference [Modifying-and-Comparing-Strings](https://developer.apple.com/documentation/swift/string#Modifying-and-Comparing-Strings). public struct BinaryDistinctString: Equatable, Hashable, Sendable, Comparable, CustomStringConvertible, ExpressibleByStringLiteral { public let value: [UInt16] public var nsString: NSString { String(utf16CodeUnits: value, count: value.count) as NSString } public var string: String { String(nsString) } public var count: Int { string.count } /// Satisfies ``CustomStringConvertible`` protocol. public var description: String { string } public init(_ bytes: [UInt16]) { value = bytes } public init(_ str: NSString) { value = Array(str as String).flatMap { $0.utf16 } } public init(_ str: String) { self.init(str as NSString) } public init(_ character: BinaryDistinctCharacter) { value = character.bytes } public init(_ characters: [BinaryDistinctCharacter]) { var data: [UInt16] = [] for character in characters { data.append(contentsOf: character.bytes) } value = data } /// Satisfies ``ExpressibleByStringLiteral`` protocol. public init(stringLiteral value: String) { self.init(value) } public static func == (lhs: BinaryDistinctString, rhs: BinaryDistinctString) -> Bool { lhs.value == rhs.value } public static func < (lhs: BinaryDistinctString, rhs: BinaryDistinctString) -> Bool { lhs.value.lexicographicallyPrecedes(rhs.value) } public static func + (lhs: BinaryDistinctString, rhs: BinaryDistinctString) -> BinaryDistinctString { BinaryDistinctString(lhs.value + rhs.value) } public func hasPrefix(_ prefix: BinaryDistinctString) -> Bool { guard prefix.value.count <= value.count else { return false } return value.starts(with: prefix.value) } public func hasSuffix(_ suffix: BinaryDistinctString) -> Bool { guard suffix.value.count <= value.count else { return false } return value.suffix(suffix.value.count) == suffix.value } public func lowercased() -> BinaryDistinctString { .init(string.lowercased()) } public func replacingOccurrences(of: Self, with: Self) -> BinaryDistinctString { BinaryDistinctString(string.replacingOccurrences(of: of.string, with: with.string)) } } public extension BinaryDistinctString { typealias Index = Int // Treat indices as integers var startIndex: Index { 0 } var endIndex: Index { count } func index(_ i: Index, offsetBy distance: Int) -> Index { let newIndex = i + distance guard newIndex >= 0, newIndex <= count else { fatalError("Index out of bounds") } return newIndex } func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? { let newIndex = i + distance return newIndex <= limit ? newIndex : nil } } extension BinaryDistinctString: Sequence { public func makeIterator() -> AnyIterator<BinaryDistinctCharacter> { var iterator = string.makeIterator() // Use native Swift String iterator return AnyIterator { guard let char = iterator.next() else { return nil } return BinaryDistinctCharacter(char) } } } public extension BinaryDistinctString { subscript(bounds: PartialRangeFrom<Int>) -> BinaryDistinctString { let validRange = bounds.lowerBound..<value.count // Convert to Range<Int> return self[validRange] } /// Returns a slice of the `BinaryDistinctString` while ensuring correct rune (grapheme cluster) boundaries. subscript(bounds: Range<Int>) -> BinaryDistinctString { guard bounds.lowerBound >= 0, bounds.upperBound <= count else { fatalError("Index out of bounds") } let utf8Bytes = value var byteIndices: [Int] = [] // Decode UTF-8 manually to find rune start positions var currentByteIndex = 0 for (index, scalar) in string.unicodeScalars.enumerated() { if index == bounds.lowerBound { byteIndices.append(currentByteIndex) } currentByteIndex += scalar.utf8.count if index == bounds.upperBound - 1 { byteIndices.append(currentByteIndex) break } } // Extract the byte range let startByteIndex = byteIndices.first ?? 0 let endByteIndex = byteIndices.last ?? utf8Bytes.count let slicedBytes = Array(utf8Bytes[startByteIndex..<endByteIndex]) return BinaryDistinctString(slicedBytes) } } public extension Dictionary where Key == BinaryDistinctString { /// Merges another `BinaryDistinctDictionary` into this one mutating func merge(_ other: [BinaryDistinctString: Value], strategy: (Value, Value) -> Value = { _, new in new }) { merge(other, uniquingKeysWith: strategy) } /// Merges a `[String: Value]` dictionary into this one mutating func merge(_ other: [String: Value], strategy: (Value, Value) -> Value = { _, new in new }) { let converted = Dictionary(uniqueKeysWithValues: other.map { (BinaryDistinctString($0.key), $0.value) }) merge(converted, uniquingKeysWith: strategy) } /// Merges a `[NSString: Value]` dictionary into this one mutating func merge(_ other: [NSString: Value], strategy: (Value, Value) -> Value = { _, new in new }) { let converted = Dictionary(uniqueKeysWithValues: other.map { (BinaryDistinctString($0.key), $0.value) }) merge(converted, uniquingKeysWith: strategy) } func merging(_ other: [String: Value], strategy: (Value, Value) -> Value = { _, new in new }) -> Self { var newDict = self newDict.merge(other, strategy: strategy) return newDict } func merging(_ other: [BinaryDistinctString: Value], strategy: (Value, Value) -> Value = { _, new in new }) -> Self { var newDict = self newDict.merge(other, strategy: strategy) return newDict } func merging(_ other: [NSString: Value], strategy: (Value, Value) -> Value = { _, new in new }) -> Self { var newDict = self newDict.merge(other, strategy: strategy) return newDict } } public protocol StringConvertible: ExpressibleByStringLiteral { } extension BinaryDistinctString: StringConvertible { } extension String: StringConvertible { } extension NSString: StringConvertible { } public struct BinaryDistinctCharacter: Equatable, Hashable, CustomStringConvertible, ExpressibleByStringLiteral { let bytes: [UInt16] public init(_ character: Character) { bytes = Array(character.utf16) } public init(_ string: String) { bytes = Array(string.utf16) } public init(_ nsString: NSString) { let swiftString = nsString as String bytes = Array(swiftString.utf16) } public init(bytes: [UInt16]) { self.bytes = bytes } /// Satisfies ``ExpressibleByStringLiteral`` protocol. public init(stringLiteral value: String) { self.init(value) } var stringValue: String? { String(utf16CodeUnits: bytes, count: bytes.count) } public var description: String { if let str = stringValue { "BinaryDistinctCharacter('\(str)', bytes: \(bytes.map { String(format: "0x%02X", $0) }))" } else { "BinaryDistinctCharacter(invalid UTF-8, bytes: \(bytes.map { String(format: "0x%02X", $0) }))" } } public static func == (lhs: BinaryDistinctCharacter, rhs: BinaryDistinctCharacter) -> Bool { lhs.bytes == rhs.bytes } }