From b7dcbfecb3f38fb7aa87453acde557209f35d653 Mon Sep 17 00:00:00 2001
From: liangxinbing <1580466765@qq.com>
Date: Wed, 19 Mar 2025 13:27:08 +0800
Subject: [PATCH] update extract_content action for BrowserUseTool

---
 app/tool/browser_use_tool.py | 62 ++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py
index 7817aef..7fe8f16 100644
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@@ -418,17 +418,7 @@ class BrowserUseTool(BaseTool, Generic[Context]):
 
                         # Create prompt for LLM
                         prompt_text = """
-Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page.
-
-Examples of extraction goals:
-- Extract all company names
-- Extract specific descriptions
-- Extract all information about a topic
-- Extract links with companies in structured format
-- Extract all links
-
-If the goal is vague, summarize the page. Respond in JSON format.
-
+Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format.
 Extraction goal: {goal}
 
 Page content:
@@ -445,10 +435,54 @@ Page content:
 
                         messages = [Message.user_message(formatted_prompt)]
 
-                        # Use LLM to extract content based on the goal
-                        response = await self.llm.ask(messages)
+                        # Define extraction function for the tool
+                        extraction_function = {
+                            "type": "function",
+                            "function": {
+                                "name": "extract_content",
+                                "description": "Extract specific information from a webpage based on a goal",
+                                "parameters": {
+                                    "type": "object",
+                                    "properties": {
+                                        "extracted_content": {
+                                            "type": "object",
+                                            "description": "The content extracted from the page according to the goal",
+                                        }
+                                    },
+                                    "required": ["extracted_content"],
+                                },
+                            },
+                        }
+
+                        # Use LLM to extract content with required function calling
+                        response = await self.llm.ask_tool(
+                            messages,
+                            tools=[extraction_function],
+                            tool_choice="required",
+                        )
+
+                        # Extract content from function call response
+                        if (
+                            response
+                            and response.tool_calls
+                            and len(response.tool_calls) > 0
+                        ):
+                            # Get the first tool call arguments
+                            tool_call = response.tool_calls[0]
+                            # Parse the JSON arguments
+                            try:
+                                args = json.loads(tool_call.function.arguments)
+                                extracted_content = args.get("extracted_content", {})
+                                # Format extracted content as JSON string
+                                content_json = json.dumps(
+                                    extracted_content, indent=2, ensure_ascii=False
+                                )
+                                msg = f"Extracted from page:\n{content_json}\n"
+                            except Exception as e:
+                                msg = f"Error parsing extraction result: {str(e)}\nRaw response: {tool_call.function.arguments}"
+                        else:
+                            msg = "No content was extracted from the page."
 
-                        msg = f"Extracted from page:\n{response}\n"
                         return ToolResult(output=msg)
                     except Exception as e:
                         # Provide a more helpful error message