simple_cua_loop.py (69 lines of code) (raw):

from computers import Computer from computers import LocalPlaywrightComputer from utils import create_response, check_blocklisted_url def acknowledge_safety_check_callback(message: str) -> bool: response = input( f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): " ).lower() return response.strip() == "y" def handle_item(item, computer: Computer): """Handle each item; may cause a computer action + screenshot.""" if item["type"] == "message": # print messages print(item["content"][0]["text"]) if item["type"] == "computer_call": # perform computer actions action = item["action"] action_type = action["type"] action_args = {k: v for k, v in action.items() if k != "type"} print(f"{action_type}({action_args})") # give our computer environment action to perform getattr(computer, action_type)(**action_args) screenshot_base64 = computer.screenshot() pending_checks = item.get("pending_safety_checks", []) for check in pending_checks: if not acknowledge_safety_check_callback(check["message"]): raise ValueError(f"Safety check failed: {check['message']}") # return value informs model of the latest screenshot call_output = { "type": "computer_call_output", "call_id": item["call_id"], "acknowledged_safety_checks": pending_checks, "output": { "type": "input_image", "image_url": f"data:image/png;base64,{screenshot_base64}", }, } # additional URL safety checks for browser environments if computer.get_environment() == "browser": current_url = computer.get_current_url() call_output["output"]["current_url"] = current_url check_blocklisted_url(current_url) return [call_output] return [] def main(): """Run the CUA (Computer Use Assistant) loop, using Local Playwright.""" with LocalPlaywrightComputer() as computer: dimensions = computer.get_dimensions() tools = [ { "type": "computer-preview", "display_width": dimensions[0], "display_height": dimensions[1], "environment": computer.get_environment(), } ] items = [] while True: # get user input forever user_input = input("> ") items.append({"role": "user", "content": user_input}) while True: # keep looping until we get a final response response = create_response( model="computer-use-preview", input=items, tools=tools, truncation="auto", ) if "output" not in response: print(response) raise ValueError("No output from model") items += response["output"] for item in response["output"]: items += handle_item(item, computer) if items[-1].get("role") == "assistant": break if __name__ == "__main__": main()