Merge pull request #265 from fred913/main

feat(browser_use_tool): add 'get_text' action to browser use tool
2025-03-09 11:40:20 +08:00 · 2025-03-09 11:40:20 +08:00 · 7a9e22d093
commit 7a9e22d093
parent c71ee143e2 0d0f8ab233
1 changed files with 13 additions and 0 deletions
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@ -20,6 +20,8 @@ content extraction, and tab management. Supported actions include:
 - 'input_text': Input text into an element
 - 'screenshot': Capture a screenshot
 - 'get_html': Get page HTML content
+- 'get_text': Get text content of the page
+- 'read_links': Get all links on the page
 - 'execute_js': Execute JavaScript code
 - 'scroll': Scroll the page
 - 'switch_tab': Switch to a specific tab
@ -43,6 +45,7 @@ class BrowserUseTool(BaseTool):
                    "input_text",
                    "screenshot",
                    "get_html",
+                    "get_text",
                    "execute_js",
                    "scroll",
                    "switch_tab",
@ -180,6 +183,16 @@ class BrowserUseTool(BaseTool):
                    truncated = html[:2000] + "..." if len(html) > 2000 else html
                    return ToolResult(output=truncated)

+                elif action == "get_text":
+                    text = await context.execute_javascript("document.body.innerText")
+                    return ToolResult(output=text)
+
+                elif action == "read_links":
+                    links = await context.execute_javascript(
+                        "document.querySelectorAll('a[href]').forEach((elem) => {if (elem.innerText) {console.log(elem.innerText, elem.href)}})"
+                    )
+                    return ToolResult(output=links)
+
                elif action == "execute_js":
                    if not script:
                        return ToolResult(