HuggingChat-Mac/Models/AccessibilityHelpers.swift (437 lines of code) (raw):
//
// AccessibilityHelpers.swift
// HuggingChat-Mac
//
// Created by Cyril Zakka on 9/11/24.
//
import Cocoa
import ApplicationServices
import UniformTypeIdentifiers
// MARK: VSCode Reader
struct VSCodeWindow: Decodable {
let windowId: String
let lastFocusTime: TimeInterval
let content: String
let selectedText: String
let language: String?
let fileName: String?
let timestamp: String
let isFocused: Bool
}
class VSCodeReader {
static let shared = VSCodeReader()
private let portRange = 54321...54330
private init() {}
struct VSCodeContent {
let content: String
let selectedText: String
let language: String?
let fileName: String?
}
func getActiveEditorContent() async throws -> VSCodeContent {
// Try each port until we find an active window
for port in portRange {
do {
let window = try await getWindowContent(port: port)
if window.isFocused {
return VSCodeContent(
content: window.content,
selectedText: window.selectedText,
language: window.language,
fileName: window.fileName
)
}
} catch {
continue
}
}
// If no focused window found, get the most recently focused one
let windows = try await getAllWindows()
guard let mostRecent = windows.max(by: { $0.lastFocusTime < $1.lastFocusTime }) else {
throw AccessibilityError.noActiveWindow
}
return VSCodeContent(
content: mostRecent.content,
selectedText: mostRecent.selectedText,
language: mostRecent.language,
fileName: mostRecent.fileName
)
}
private func getAllWindows() async throws -> [VSCodeWindow] {
var windows: [VSCodeWindow] = []
for port in portRange {
do {
let window = try await getWindowContent(port: port)
windows.append(window)
} catch {
continue
}
}
guard !windows.isEmpty else {
throw AccessibilityError.noActiveWindow
}
return windows
}
private func getWindowContent(port: Int) async throws -> VSCodeWindow {
let url = URL(string: "http://127.0.0.1:\(port)")!
let (data, _) = try await URLSession.shared.data(from: url)
let decoder = JSONDecoder()
return try decoder.decode(VSCodeWindow.self, from: data)
}
}
enum AccessibilityError: Error {
case noActiveWindow
case extractionFailed
}
class AccessibilityContentReader {
private enum TextExtractionError: Error {
case noTextFound
case invalidHierarchy
}
static let shared = AccessibilityContentReader()
private let supportedApps = [
"com.apple.dt.Xcode", // Xcode
"com.googlecode.iterm2", // iTerm2
"com.apple.Terminal", // Terminal
"com.microsoft.VSCode" // Visual Studio Code
]
private init() {
checkAccessibilityPermissions()
}
struct EditorContent {
let fullText: String
let selectedText: String?
let applicationName: String?
let bundleIdentifier: String?
let applicationIcon: NSImage?
var isSupported: Bool {
guard let bundleId = bundleIdentifier else { return false }
return AccessibilityContentReader.shared.supportedApps.contains(bundleId)
}
}
private func checkAccessibilityPermissions() {
let options = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: true]
let trusted = AXIsProcessTrustedWithOptions(options as CFDictionary)
if !trusted {
NSLog("⚠️ Accessibility permissions not granted. Content reading features will not work.")
}
}
func getActiveEditorContent() async -> EditorContent? {
guard let app = NSWorkspace.shared.frontmostApplication,
let windowElement = getFocusedWindow(for: app) else {
return nil
}
async let icon = getApplicationIcon(for: app)
if let bundleId = app.bundleIdentifier,
supportedApps.contains(bundleId) {
do {
// For VSCode, use the VSCodeReader
if bundleId == "com.microsoft.VSCode" {
let vsContent = try await VSCodeReader.shared.getActiveEditorContent()
return EditorContent(
fullText: vsContent.content,
selectedText: vsContent.selectedText,
applicationName: app.localizedName,
bundleIdentifier: bundleId,
applicationIcon: await icon
)
} else {
async let fullText = getFullText(from: windowElement)
async let selectedText = getSelectedText(from: windowElement)
return EditorContent(
fullText: (try await fullText) ?? "",
selectedText: await selectedText,
applicationName: app.localizedName,
bundleIdentifier: app.bundleIdentifier,
applicationIcon: await icon
)
}
} catch {
print("Error getting editor content: \(error)")
return nil
}
} else {
return await EditorContent(
fullText: "",
selectedText: nil,
applicationName: app.localizedName,
bundleIdentifier: app.bundleIdentifier,
applicationIcon: icon
)
}
}
private func getApplicationIcon(for app: NSRunningApplication) -> NSImage? {
if let icon = app.icon {
return icon
}
if let bundleIdentifier = app.bundleIdentifier,
let bundle = NSWorkspace.shared.urlForApplication(withBundleIdentifier: bundleIdentifier) {
let icon = NSWorkspace.shared.icon(forFile: bundle.path)
return icon
}
return NSWorkspace.shared.icon(for: UTType.application)
}
private func getFocusedWindow(for app: NSRunningApplication) -> AXUIElement? {
let appRef = AXUIElementCreateApplication(app.processIdentifier)
var focusedWindow: CFTypeRef?
guard AXUIElementCopyAttributeValue(appRef,
kAXFocusedWindowAttribute as CFString,
&focusedWindow) == .success else {
return nil
}
return (focusedWindow as! AXUIElement)
}
private func getSelectedText(from element: AXUIElement) -> String? {
if isTextInputElement(element) {
var selectedText: CFTypeRef?
if AXUIElementCopyAttributeValue(element,
kAXSelectedTextAttribute as CFString,
&selectedText) == .success,
let text = selectedText as? String,
!text.isEmpty {
return text
}
}
var children: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXChildrenAttribute as CFString,
&children) == .success,
let childrenArray = children as? [AXUIElement] else {
return nil
}
for child in childrenArray {
if let selected = getSelectedText(from: child) {
return selected
}
}
return nil
}
private func isTextInputElement(_ element: AXUIElement) -> Bool {
var role: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXRoleAttribute as CFString,
&role) == .success,
let roleString = role as? String else {
return false
}
return ["AXTextArea", "AXTextField", "AXTextInput", "AXComboBox"].contains(roleString)
}
private func getFullText(from element: AXUIElement) async throws -> String? {
guard let bundleId = NSWorkspace.shared.frontmostApplication?.bundleIdentifier else {
return nil
}
return try await extractTextForApp(bundleId: bundleId, from: element)
}
private func extractTextForApp(bundleId: String, from element: AXUIElement) async throws -> String {
switch bundleId {
case "com.apple.dt.Xcode":
return try extractTextFromXcode(element)
default:
return try extractTextGeneric(element)
}
}
private func extractTextFromXcode(_ element: AXUIElement) throws -> String {
var role: CFTypeRef?
var subrole: CFTypeRef?
AXUIElementCopyAttributeValue(element, kAXRoleAttribute as CFString, &role)
AXUIElementCopyAttributeValue(element, kAXSubroleAttribute as CFString, &subrole)
let roleString = role as? String ?? "unknown"
if roleString == "AXTextArea",
let parent = getParentElement(of: element),
let parentRole = getRole(of: parent),
parentRole == "AXScrollArea" {
if let value = getValue(from: element), !value.isEmpty {
return value
}
var textValue: CFTypeRef?
if AXUIElementCopyAttributeValue(element, "AXValue" as CFString, &textValue) == .success,
let text = textValue as? String,
!text.isEmpty {
return text
}
}
var children: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXChildrenAttribute as CFString,
&children) == .success,
let childrenArray = children as? [AXUIElement] else {
return ""
}
for child in childrenArray {
if let result = try? extractTextFromXcode(child), !result.isEmpty {
return result
}
}
return ""
}
private func extractTextGeneric(_ element: AXUIElement) throws -> String {
if isTextInputElement(element) {
if let text = getValue(from: element), !text.isEmpty {
return text
}
}
return try searchChildrenForText(element)
}
private func searchChildrenForText(_ element: AXUIElement) throws -> String {
var children: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXChildrenAttribute as CFString,
&children) == .success,
let childrenArray = children as? [AXUIElement] else {
throw TextExtractionError.invalidHierarchy
}
var combinedText = ""
for child in childrenArray {
if let childText = try? extractTextGeneric(child) {
combinedText += childText + " "
}
}
if combinedText.isEmpty {
throw TextExtractionError.noTextFound
}
return combinedText.trimmingCharacters(in: .whitespacesAndNewlines)
}
private func getValue(from element: AXUIElement) -> String? {
var value: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXValueAttribute as CFString,
&value) == .success else {
return nil
}
return value as? String
}
private func getParentElement(of element: AXUIElement) -> AXUIElement? {
var parent: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXParentAttribute as CFString,
&parent) == .success else {
return nil
}
return (parent as! AXUIElement)
}
private func getRole(of element: AXUIElement) -> String? {
var role: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXRoleAttribute as CFString,
&role) == .success else {
return nil
}
return role as? String
}
}
class AccessibilityTextPaster {
/// Singleton instance for shared access
static let shared = AccessibilityTextPaster()
private init() {
// Check for accessibility permissions
checkAccessibilityPermissions()
}
// MARK: - Public Interface
/// Paste text to the currently focused text field
/// - Parameter text: The text to paste
/// - Returns: Bool indicating success
@discardableResult
func pasteText(_ text: String) -> Bool {
// First try using the pasteboard - this is the most reliable method
if pasteThroughPasteboard(text) {
return true
}
// Fall back to accessibility API if pasteboard method fails
return pasteThroughAccessibility(text)
}
// MARK: - Private Methods
private func checkAccessibilityPermissions() {
let options = [kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: true]
let trusted = AXIsProcessTrustedWithOptions(options as CFDictionary)
if !trusted {
NSLog("⚠️ Accessibility permissions not granted. Some features may not work.")
}
}
/// Get the currently active application
/// - Returns: NSRunningApplication? of the active app
func getCurrentApplication() -> NSRunningApplication? {
return NSWorkspace.shared.frontmostApplication
}
/// Get the bundle identifier of the currently active application
/// - Returns: String? containing the bundle identifier
func getCurrentApplicationBundleIdentifier() -> String? {
return getCurrentApplication()?.bundleIdentifier
}
/// Get the localized name of the currently active application
/// - Returns: String? containing the application name
func getCurrentApplicationName() -> String? {
return getCurrentApplication()?.localizedName
}
private func pasteThroughPasteboard(_ text: String) -> Bool {
// Store current pasteboard contents
let pasteboard = NSPasteboard.general
let oldContents = pasteboard.string(forType: .string)
// Set new content
pasteboard.clearContents()
pasteboard.setString(text, forType: .string)
// Simulate Cmd+V
if simulateCommandV() {
// Wait a brief moment to ensure paste completes
Thread.sleep(forTimeInterval: 0.1)
// Restore old contents if needed
if let oldContents = oldContents {
pasteboard.clearContents()
pasteboard.setString(oldContents, forType: .string)
}
return true
}
return false
}
private func pasteThroughAccessibility(_ text: String) -> Bool {
guard let focusedElement = getFocusedTextElement() else {
return false
}
// Try setting value directly first
if setValueDirectly(text, for: focusedElement) {
return true
}
// Fall back to simulating insertion
return insertTextThroughAccessibility(text, in: focusedElement)
}
private func getFocusedTextElement() -> AXUIElement? {
let systemWide = AXUIElementCreateSystemWide()
var focusedElement: AnyObject?
guard AXUIElementCopyAttributeValue(systemWide,
kAXFocusedUIElementAttribute as CFString,
&focusedElement) == .success else {
return nil
}
let element = focusedElement as! AXUIElement
// Check if element is directly a text element
if isTextElement(element) {
return element
}
// Search children for text element
return findTextElement(in: element)
}
private func isTextElement(_ element: AXUIElement) -> Bool {
var role: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXRoleAttribute as CFString,
&role) == .success,
let roleString = role as? String else {
return false
}
return ["AXTextField", "AXTextArea", "AXComboBox"].contains(roleString)
}
private func findTextElement(in element: AXUIElement) -> AXUIElement? {
var children: CFTypeRef?
guard AXUIElementCopyAttributeValue(element,
kAXChildrenAttribute as CFString,
&children) == .success,
let childrenArray = children as? [AXUIElement] else {
return nil
}
for child in childrenArray {
if isTextElement(child) {
return child
}
if let found = findTextElement(in: child) {
return found
}
}
return nil
}
private func setValueDirectly(_ text: String, for element: AXUIElement) -> Bool {
let result = AXUIElementSetAttributeValue(element,
kAXValueAttribute as CFString,
text as CFTypeRef)
return result == .success
}
private func insertTextThroughAccessibility(_ text: String, in element: AXUIElement) -> Bool {
// First try to use AXInsertText if available
var actions: CFArray?
guard AXUIElementCopyActionNames(element, &actions) == .success,
let actionNames = actions as? [String] else {
return false
}
if actionNames.contains("AXInsertText") {
return AXUIElementPerformAction(element, "AXInsertText" as CFString) == .success
}
return false
}
private func simulateCommandV() -> Bool {
guard let source = CGEventSource(stateID: .hidSystemState) else {
return false
}
guard let vDown = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: true),
let vUp = CGEvent(keyboardEventSource: source, virtualKey: 0x09, keyDown: false) else {
return false
}
vDown.flags = .maskCommand
vUp.flags = .maskCommand
vDown.post(tap: .cghidEventTap)
vUp.post(tap: .cghidEventTap)
return true
}
}