computers/default/browserbase.py (91 lines of code) (raw):
import os
from typing import Tuple, Dict, List, Union, Optional
from playwright.sync_api import Browser, Page, BrowserContext, Error as PlaywrightError
from ..shared.base_playwright import BasePlaywrightComputer
from browserbase import Browserbase
from dotenv import load_dotenv
import base64
load_dotenv()
class BrowserbaseBrowser(BasePlaywrightComputer):
"""
Browserbase is a headless browser platform that offers a remote browser API. You can use it to control thousands of browsers from anywhere.
You can find more information about Browserbase at https://www.browserbase.com/computer-use or view our OpenAI CUA Quickstart at https://docs.browserbase.com/integrations/openai-cua/introduction.
IMPORTANT: This Browserbase computer requires the use of the `goto` tool defined in playwright_with_custom_functions.py.
Make sure to include this tool in your configuration when using the Browserbase computer.
"""
def get_dimensions(self):
return self.dimensions
def __init__(
self,
width: int = 1024,
height: int = 768,
region: str = "us-west-2",
proxy: bool = False,
virtual_mouse: bool = True,
ad_blocker: bool = False,
):
"""
Initialize the Browserbase instance. Additional configuration options for features such as persistent cookies, ad blockers, file downloads and more can be found in the Browserbase API documentation: https://docs.browserbase.com/reference/api/create-a-session
Args:
width (int): The width of the browser viewport. Default is 1024.
height (int): The height of the browser viewport. Default is 768.
region (str): The region for the Browserbase session. Default is "us-west-2". Pick a region close to you for better performance. https://docs.browserbase.com/guides/multi-region
proxy (bool): Whether to use a proxy for the session. Default is False. Turn on proxies if you're browsing is frequently interrupted. https://docs.browserbase.com/features/proxies
virtual_mouse (bool): Whether to enable the virtual mouse cursor. Default is True.
ad_blocker (bool): Whether to enable the built-in ad blocker. Default is False.
"""
super().__init__()
self.bb = Browserbase(api_key=os.getenv("BROWSERBASE_API_KEY"))
self.project_id = os.getenv("BROWSERBASE_PROJECT_ID")
self.session = None
self.dimensions = (width, height)
self.region = region
self.proxy = proxy
self.virtual_mouse = virtual_mouse
self.ad_blocker = ad_blocker
def _get_browser_and_page(self) -> Tuple[Browser, Page]:
"""
Create a Browserbase session and connect to it.
Returns:
Tuple[Browser, Page]: A tuple containing the connected browser and page objects.
"""
# Create a session on Browserbase with specified parameters
width, height = self.dimensions
session_params = {
"project_id": self.project_id,
"browser_settings": {
"viewport": {"width": width, "height": height},
"blockAds": self.ad_blocker,
},
"region": self.region,
"proxies": self.proxy,
}
self.session = self.bb.sessions.create(**session_params)
# Print the live session URL
print(
f"Watch and control this browser live at https://www.browserbase.com/sessions/{self.session.id}"
)
# Connect to the remote session
browser = self._playwright.chromium.connect_over_cdp(
self.session.connect_url, timeout=60000
)
context = browser.contexts[0]
# Add event listeners for page creation and closure
context.on("page", self._handle_new_page)
# Only add the init script if virtual_mouse is True
if self.virtual_mouse:
context.add_init_script(
"""
// Only run in the top frame
if (window.self === window.top) {
function initCursor() {
const CURSOR_ID = '__cursor__';
// Check if cursor element already exists
if (document.getElementById(CURSOR_ID)) return;
const cursor = document.createElement('div');
cursor.id = CURSOR_ID;
Object.assign(cursor.style, {
position: 'fixed',
top: '0px',
left: '0px',
width: '20px',
height: '20px',
backgroundImage: 'url("data:image/svg+xml;utf8,<svg xmlns=\\'http://www.w3.org/2000/svg\\' viewBox=\\'0 0 24 24\\' fill=\\'black\\' stroke=\\'white\\' stroke-width=\\'1\\' stroke-linejoin=\\'round\\' stroke-linecap=\\'round\\'><polygon points=\\'2,2 2,22 8,16 14,22 17,19 11,13 20,13\\'/></svg>")',
backgroundSize: 'cover',
pointerEvents: 'none',
zIndex: '99999',
transform: 'translate(-2px, -2px)',
});
document.body.appendChild(cursor);
document.addEventListener("mousemove", (e) => {
cursor.style.top = e.clientY + "px";
cursor.style.left = e.clientX + "px";
});
}
// Use requestAnimationFrame for early execution
requestAnimationFrame(function checkBody() {
if (document.body) {
initCursor();
} else {
requestAnimationFrame(checkBody);
}
});
}
"""
)
page = context.pages[0]
page.on("close", self._handle_page_close)
page.goto("https://bing.com")
return browser, page
def _handle_new_page(self, page: Page):
"""Handle the creation of a new page."""
print("New page created")
self._page = page
page.on("close", self._handle_page_close)
def _handle_page_close(self, page: Page):
"""Handle the closure of a page."""
print("Page closed")
if self._page == page:
if self._browser.contexts[0].pages:
self._page = self._browser.contexts[0].pages[-1]
else:
print("Warning: All pages have been closed.")
self._page = None
def __exit__(self, exc_type, exc_val, exc_tb):
"""
Clean up resources when exiting the context manager.
Args:
exc_type: The type of the exception that caused the context to be exited.
exc_val: The exception instance that caused the context to be exited.
exc_tb: A traceback object encapsulating the call stack at the point where the exception occurred.
"""
if self._page:
self._page.close()
if self._browser:
self._browser.close()
if self._playwright:
self._playwright.stop()
if self.session:
print(
f"Session completed. View replay at https://browserbase.com/sessions/{self.session.id}"
)
def screenshot(self) -> str:
"""
Capture a screenshot of the current viewport using CDP.
Returns:
str: A base64 encoded string of the screenshot.
"""
try:
# Get CDP session from the page
cdp_session = self._page.context.new_cdp_session(self._page)
# Capture screenshot using CDP
result = cdp_session.send(
"Page.captureScreenshot", {"format": "png", "fromSurface": True}
)
return result["data"]
except PlaywrightError as error:
print(
f"CDP screenshot failed, falling back to standard screenshot: {error}"
)
return super().screenshot()