feat(browser_use_tool): add 'get_text' action to browser use tool

2025-03-08 16:45:18 +08:00 · 2025-03-08 16:45:18 +08:00 · 6b77a99448
commit 6b77a99448
parent e76055b436
1 changed files with 10 additions and 3 deletions
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@ -11,7 +11,6 @@ from pydantic_core.core_schema import ValidationInfo

 from app.tool.base import BaseTool, ToolResult

-
 _BROWSER_DESCRIPTION = """
 Interact with a web browser to perform various actions such as navigation, element interaction,
 content extraction, and tab management. Supported actions include:
@ -20,6 +19,7 @@ content extraction, and tab management. Supported actions include:
 - 'input_text': Input text into an element
 - 'screenshot': Capture a screenshot
 - 'get_html': Get page HTML content
+- 'get_text': Get text content of the page
 - 'execute_js': Execute JavaScript code
 - 'scroll': Scroll the page
 - 'switch_tab': Switch to a specific tab
@ -43,6 +43,7 @@ class BrowserUseTool(BaseTool):
                    "input_text",
                    "screenshot",
                    "get_html",
+                    "get_text",
                    "execute_js",
                    "scroll",
                    "switch_tab",
@ -171,15 +172,21 @@ class BrowserUseTool(BaseTool):
                elif action == "screenshot":
                    screenshot = await context.take_screenshot(full_page=True)
                    return ToolResult(
-                        output=f"Screenshot captured (base64 length: {len(screenshot)})",
+                        output=
+                        f"Screenshot captured (base64 length: {len(screenshot)})",
                        system=screenshot,
                    )

                elif action == "get_html":
                    html = await context.get_page_html()
-                    truncated = html[:2000] + "..." if len(html) > 2000 else html
+                    truncated = html[:2000] + "..." if len(
+                        html) > 2000 else html
                    return ToolResult(output=truncated)

+                elif action == "get_text":
+                    text = await context.execute_javascript('document.body.innerText')
+                    return ToolResult(output=text)
+
                elif action == "execute_js":
                    if not script:
                        return ToolResult(