From 6b77a9944822fd9a4b3b500bc04cb0afd2ba4957 Mon Sep 17 00:00:00 2001
From: Sheng Fan <fredtools999@gmail.com>
Date: Sat, 8 Mar 2025 16:45:18 +0800
Subject: [PATCH 1/3] feat(browser_use_tool): add 'get_text' action to browser
 use tool

---
 app/tool/browser_use_tool.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py
index 62f12a5..bd4afcc 100644
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@@ -11,7 +11,6 @@ from pydantic_core.core_schema import ValidationInfo
 
 from app.tool.base import BaseTool, ToolResult
 
-
 _BROWSER_DESCRIPTION = """
 Interact with a web browser to perform various actions such as navigation, element interaction,
 content extraction, and tab management. Supported actions include:
@@ -20,6 +19,7 @@ content extraction, and tab management. Supported actions include:
 - 'input_text': Input text into an element
 - 'screenshot': Capture a screenshot
 - 'get_html': Get page HTML content
+- 'get_text': Get text content of the page
 - 'execute_js': Execute JavaScript code
 - 'scroll': Scroll the page
 - 'switch_tab': Switch to a specific tab
@@ -43,6 +43,7 @@ class BrowserUseTool(BaseTool):
                     "input_text",
                     "screenshot",
                     "get_html",
+                    "get_text",
                     "execute_js",
                     "scroll",
                     "switch_tab",
@@ -171,15 +172,21 @@ class BrowserUseTool(BaseTool):
                 elif action == "screenshot":
                     screenshot = await context.take_screenshot(full_page=True)
                     return ToolResult(
-                        output=f"Screenshot captured (base64 length: {len(screenshot)})",
+                        output=
+                        f"Screenshot captured (base64 length: {len(screenshot)})",
                         system=screenshot,
                     )
 
                 elif action == "get_html":
                     html = await context.get_page_html()
-                    truncated = html[:2000] + "..." if len(html) > 2000 else html
+                    truncated = html[:2000] + "..." if len(
+                        html) > 2000 else html
                     return ToolResult(output=truncated)
 
+                elif action == "get_text":
+                    text = await context.execute_javascript('document.body.innerText')
+                    return ToolResult(output=text)
+
                 elif action == "execute_js":
                     if not script:
                         return ToolResult(

From 7090490f75f9671fade64585ce3bb7b954e1c104 Mon Sep 17 00:00:00 2001
From: Sheng Fan <fredtools999@gmail.com>
Date: Sat, 8 Mar 2025 18:23:40 +0800
Subject: [PATCH 2/3] feat(browser_use_tool): add 'read_links' action

---
 app/tool/browser_use_tool.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py
index bd4afcc..981ef1c 100644
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@@ -20,6 +20,7 @@ content extraction, and tab management. Supported actions include:
 - 'screenshot': Capture a screenshot
 - 'get_html': Get page HTML content
 - 'get_text': Get text content of the page
+- 'read_links': Get all links on the page
 - 'execute_js': Execute JavaScript code
 - 'scroll': Scroll the page
 - 'switch_tab': Switch to a specific tab
@@ -187,6 +188,10 @@ class BrowserUseTool(BaseTool):
                     text = await context.execute_javascript('document.body.innerText')
                     return ToolResult(output=text)
 
+                elif action == "read_links":
+                    links = await context.execute_javascript("document.querySelectorAll('a[href]').forEach((elem) => {if (elem.innerText) {console.log(elem.innerText, elem.href)}})")
+                    return ToolResult(output=links)
+
                 elif action == "execute_js":
                     if not script:
                         return ToolResult(

From 0d0f8ab2330d2329bc2c0a8a8b002ebb9d8fcf1e Mon Sep 17 00:00:00 2001
From: Sheng Fan <fredtools999@gmail.com>
Date: Sun, 9 Mar 2025 11:30:41 +0800
Subject: [PATCH 3/3] chore(browser_use_tool): fix code style according to
 pre-commit

---
 app/tool/browser_use_tool.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py
index 981ef1c..d2cf2a5 100644
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@@ -11,6 +11,7 @@ from pydantic_core.core_schema import ValidationInfo
 
 from app.tool.base import BaseTool, ToolResult
 
+
 _BROWSER_DESCRIPTION = """
 Interact with a web browser to perform various actions such as navigation, element interaction,
 content extraction, and tab management. Supported actions include:
@@ -173,23 +174,23 @@ class BrowserUseTool(BaseTool):
                 elif action == "screenshot":
                     screenshot = await context.take_screenshot(full_page=True)
                     return ToolResult(
-                        output=
-                        f"Screenshot captured (base64 length: {len(screenshot)})",
+                        output=f"Screenshot captured (base64 length: {len(screenshot)})",
                         system=screenshot,
                     )
 
                 elif action == "get_html":
                     html = await context.get_page_html()
-                    truncated = html[:2000] + "..." if len(
-                        html) > 2000 else html
+                    truncated = html[:2000] + "..." if len(html) > 2000 else html
                     return ToolResult(output=truncated)
 
                 elif action == "get_text":
-                    text = await context.execute_javascript('document.body.innerText')
+                    text = await context.execute_javascript("document.body.innerText")
                     return ToolResult(output=text)
 
                 elif action == "read_links":
-                    links = await context.execute_javascript("document.querySelectorAll('a[href]').forEach((elem) => {if (elem.innerText) {console.log(elem.innerText, elem.href)}})")
+                    links = await context.execute_javascript(
+                        "document.querySelectorAll('a[href]').forEach((elem) => {if (elem.innerText) {console.log(elem.innerText, elem.href)}})"
+                    )
                     return ToolResult(output=links)
 
                 elif action == "execute_js":