in spring-ai-alibaba-graph/spring-ai-alibaba-graph-example/src/main/java/com/alibaba/cloud/ai/example/graph/openmanus/tool/BrowserUseTool.java [180:344]
public ToolExecuteResult run(String toolInput) {
log.info("BrowserUseTool toolInput:{}", toolInput);
Map<String, Object> toolInputMap = JSON.parseObject(toolInput, new TypeReference<Map<String, Object>>() {
});
String action = null;
if (toolInputMap.get("action") != null) {
action = (String) toolInputMap.get("action");
}
String url = null;
if (toolInputMap.get("url") != null) {
url = (String) toolInputMap.get("url");
}
Integer index = null;
if (toolInputMap.get("index") != null) {
index = (Integer) toolInputMap.get("index");
}
String text = null;
if (toolInputMap.get("text") != null) {
text = (String) toolInputMap.get("text");
}
String script = null;
if (toolInputMap.get("script") != null) {
script = (String) toolInputMap.get("script");
}
Integer scrollAmount = null;
if (toolInputMap.get("scroll_amount") != null) {
scrollAmount = (Integer) toolInputMap.get("scroll_amount");
}
Integer tabId = null;
if (toolInputMap.get("tab_id") != null) {
tabId = (Integer) toolInputMap.get("tab_id");
}
try {
switch (action) {
case "navigate":
if (url == null) {
return new ToolExecuteResult("URL is required for 'navigate' action");
}
// driver.manage().timeouts().pageLoadTimeout(Duration.ofSeconds(30));
driver.get(url);
return new ToolExecuteResult("Navigated to " + url);
case "click":
if (index == null) {
return new ToolExecuteResult("Index is required for 'click' action");
}
List<WebElement> elements = driver.findElements(By.cssSelector("*"));
if (index < 0 || index >= elements.size()) {
return new ToolExecuteResult("Element with index " + index + " not found");
}
elements.get(index).click();
return new ToolExecuteResult("Clicked element at index " + index);
case "input_text":
if (index == null || text == null) {
return new ToolExecuteResult("Index and text are required for 'input_text' action");
}
WebElement inputElement = driver.findElements(By.cssSelector("input, textarea")).get(index);
inputElement.sendKeys(text);
return new ToolExecuteResult("Successfully input '" + text + "' into element at index " + index);
case "key_enter":
if (index == null) {
return new ToolExecuteResult("Index are required for 'key_enter' action");
}
WebElement inputElement2 = driver.findElements(By.cssSelector("input, textarea")).get(index);
inputElement2.sendKeys(Keys.RETURN);
return new ToolExecuteResult("Hit the enter key at index " + index);
case "screenshot":
TakesScreenshot screenshot = (TakesScreenshot) driver;
String base64Screenshot = screenshot.getScreenshotAs(OutputType.BASE64);
return new ToolExecuteResult(
"Screenshot captured (base64 length: " + base64Screenshot.length() + ")");
case "get_html":
String html = driver.getPageSource();
return new ToolExecuteResult(
html.length() > MAX_LENGTH ? html.substring(0, MAX_LENGTH) + "..." : html);
case "get_text":
int counter = 0;
String body = driver.findElement(By.tagName("body")).getText();
log.info("get_text body is {}", body);
if (body != null && body.contains("我们的系统检测到您的计算机网络中存在异常流量")) {
while (counter++ < 5) {
Thread.sleep(10000);
body = driver.findElement(By.tagName("body")).getText();
log.info("retry {} get_text body is {}", counter, body);
if (body != null && body.contains("我们的系统检测到您的计算机网络中存在异常流量")) {
continue;
}
return new ToolExecuteResult(body);
}
}
return new ToolExecuteResult(body);
case "execute_js":
if (script == null) {
return new ToolExecuteResult("Script is required for 'execute_js' action");
}
JavascriptExecutor jsExecutor = (JavascriptExecutor) driver;
Object result = jsExecutor.executeScript(script);
if (result == null) {
return new ToolExecuteResult("Successfully executed JavaScript code.");
}
else {
return new ToolExecuteResult(result.toString());
}
case "scroll":
if (scrollAmount == null) {
return new ToolExecuteResult("Scroll amount is required for 'scroll' action");
}
((JavascriptExecutor) driver).executeScript("window.scrollBy(0," + scrollAmount + ");");
String direction = scrollAmount > 0 ? "down" : "up";
return new ToolExecuteResult("Scrolled " + direction + " by " + Math.abs(scrollAmount) + " pixels");
case "new_tab":
if (url == null) {
return new ToolExecuteResult("URL is required for 'new_tab' action");
}
((JavascriptExecutor) driver).executeScript("window.open('" + url + "', '_blank');");
return new ToolExecuteResult("Opened new tab with URL " + url);
case "close_tab":
driver.close();
return new ToolExecuteResult("Closed current tab");
case "switch_tab":
if (tabId == null) {
return new ToolExecuteResult("Tab ID is out of range for 'switch_tab' action");
}
Object[] windowHandles = driver.getWindowHandles().toArray();
driver.switchTo().window(windowHandles[tabId].toString());
return new ToolExecuteResult("Switched to tab " + tabId);
case "refresh":
driver.navigate().refresh();
return new ToolExecuteResult("Refreshed current page");
default:
return new ToolExecuteResult("Unknown action: " + action);
}
}
catch (Exception e) {
if (e instanceof ElementNotInteractableException) {
String errorMessage = String.format(
"""
Browser action '%s' failed, mostly like to have used the wrong index argument.
You can try to use 'get_html' to get and analyze the page HTML content first and then use other actions to find the right input element.
Tips for :
1. ignore all the hidden input or textarea elements.
2. for baidu engine, you can use js script to do the operation
detailed exception message:
%s
""",
action, e.getMessage());
return new ToolExecuteResult(errorMessage);
}
return new ToolExecuteResult("Browser action '" + action + "' failed: " + e.getMessage());
}
}