From 6b77a9944822fd9a4b3b500bc04cb0afd2ba4957 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Sat, 8 Mar 2025 16:45:18 +0800 Subject: [PATCH] feat(browser_use_tool): add 'get_text' action to browser use tool --- app/tool/browser_use_tool.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index 62f12a5..bd4afcc 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -11,7 +11,6 @@ from pydantic_core.core_schema import ValidationInfo from app.tool.base import BaseTool, ToolResult - _BROWSER_DESCRIPTION = """ Interact with a web browser to perform various actions such as navigation, element interaction, content extraction, and tab management. Supported actions include: @@ -20,6 +19,7 @@ content extraction, and tab management. Supported actions include: - 'input_text': Input text into an element - 'screenshot': Capture a screenshot - 'get_html': Get page HTML content +- 'get_text': Get text content of the page - 'execute_js': Execute JavaScript code - 'scroll': Scroll the page - 'switch_tab': Switch to a specific tab @@ -43,6 +43,7 @@ class BrowserUseTool(BaseTool): "input_text", "screenshot", "get_html", + "get_text", "execute_js", "scroll", "switch_tab", @@ -171,15 +172,21 @@ class BrowserUseTool(BaseTool): elif action == "screenshot": screenshot = await context.take_screenshot(full_page=True) return ToolResult( - output=f"Screenshot captured (base64 length: {len(screenshot)})", + output= + f"Screenshot captured (base64 length: {len(screenshot)})", system=screenshot, ) elif action == "get_html": html = await context.get_page_html() - truncated = html[:2000] + "..." if len(html) > 2000 else html + truncated = html[:2000] + "..." if len( + html) > 2000 else html return ToolResult(output=truncated) + elif action == "get_text": + text = await context.execute_javascript('document.body.innerText') + return ToolResult(output=text) + elif action == "execute_js": if not script: return ToolResult(