From 3cb4489cd5de4938c995bb10c47ff0790a19977a Mon Sep 17 00:00:00 2001 From: zhengshuli Date: Tue, 11 Mar 2025 16:51:35 +0800 Subject: [PATCH 1/5] Support configuring all BrowserConfig parameters within browser-use. --- app/config.py | 58 ++++++++++++++++++++++++++++++++++-- app/tool/browser_use_tool.py | 40 +++++++++++++++++++++++-- config/config.example.toml | 22 ++++++++++++++ 3 files changed, 116 insertions(+), 4 deletions(-) diff --git a/app/config.py b/app/config.py index 540a8ac..cfd3f25 100644 --- a/app/config.py +++ b/app/config.py @@ -1,7 +1,7 @@ import threading import tomllib from pathlib import Path -from typing import Dict +from typing import Dict, List, Optional from pydantic import BaseModel, Field @@ -25,8 +25,28 @@ class LLMSettings(BaseModel): api_version: str = Field(..., description="Azure Openai version if AzureOpenai") +class ProxySettings(BaseModel): + server: str = Field(None, description="Proxy server address") + username: Optional[str] = Field(None, description="Proxy username") + password: Optional[str] = Field(None, description="Proxy password") + + +class BrowserSettings(BaseModel): + headless: bool = Field(False, description="Whether to run browser in headless mode") + disable_security: bool = Field(True, description="Disable browser security features") + extra_chromium_args: List[str] = Field(default_factory=list, description="Extra arguments to pass to the browser") + chrome_instance_path: Optional[str] = Field(None, description="Path to a Chrome instance to use") + wss_url: Optional[str] = Field(None, description="Connect to a browser instance via WebSocket") + cdp_url: Optional[str] = Field(None, description="Connect to a browser instance via CDP") + proxy: Optional[ProxySettings] = Field(None, description="Proxy settings for the browser") + + class AppConfig(BaseModel): llm: Dict[str, LLMSettings] + browser_config: Optional[BrowserSettings] = Field(None, description="Browser configuration") + + class Config: + arbitrary_types_allowed = True class Config: @@ -82,6 +102,35 @@ class Config: "api_version": base_llm.get("api_version", ""), } + # handle browser config. + browser_config = raw_config.get("browser", {}) + browser_settings = None + + if browser_config: + # handle proxy settings. + proxy_config = browser_config.get("proxy", {}) + proxy_settings = None + + if proxy_config and proxy_config.get("server"): + proxy_settings = ProxySettings(**{ + k: v for k, v in proxy_config.items() + if k in ["server", "username", "password"] and v + }) + + # filter valid browser config parameters. + valid_browser_params = { + k: v for k, v in browser_config.items() + if k in BrowserSettings.__annotations__ and v is not None + } + + # if there is proxy settings, add it to the parameters. + if proxy_settings: + valid_browser_params["proxy"] = proxy_settings + + # only create BrowserSettings when there are valid parameters. + if valid_browser_params: + browser_settings = BrowserSettings(**valid_browser_params) + config_dict = { "llm": { "default": default_settings, @@ -89,7 +138,8 @@ class Config: name: {**default_settings, **override_config} for name, override_config in llm_overrides.items() }, - } + }, + "browser_config": browser_settings, } self._config = AppConfig(**config_dict) @@ -97,6 +147,10 @@ class Config: @property def llm(self) -> Dict[str, LLMSettings]: return self._config.llm + + @property + def browser_config(self) -> Optional[BrowserSettings]: + return self._config.browser_config config = Config() diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index e7cf9a6..22cefee 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -5,10 +5,12 @@ from typing import Optional from browser_use import Browser as BrowserUseBrowser from browser_use import BrowserConfig from browser_use.browser.context import BrowserContext +from browser_use.browser.context import BrowserContextConfig from browser_use.dom.service import DomService from pydantic import Field, field_validator from pydantic_core.core_schema import ValidationInfo +from app.config import config from app.tool.base import BaseTool, ToolResult MAX_LENGTH = 2000 @@ -104,10 +106,44 @@ class BrowserUseTool(BaseTool): async def _ensure_browser_initialized(self) -> BrowserContext: """Ensure browser and context are initialized.""" if self.browser is None: - self.browser = BrowserUseBrowser(BrowserConfig(headless=False)) + browser_config_kwargs = {"headless": False} + + if config.browser_config: + from browser_use.browser.browser import ProxySettings + + # handle proxy settings. + if config.browser_config.proxy and config.browser_config.proxy.server: + browser_config_kwargs["proxy"] = ProxySettings( + server=config.browser_config.proxy.server, + username=config.browser_config.proxy.username, + password=config.browser_config.proxy.password + ) + + browser_attrs = [ + "headless", "disable_security", "extra_chromium_args", + "chrome_instance_path", "wss_url", "cdp_url" + ] + + for attr in browser_attrs: + value = getattr(config.browser_config, attr, None) + if value is not None: + if not isinstance(value, list) or value: + browser_config_kwargs[attr] = value + + self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs)) + if self.context is None: - self.context = await self.browser.new_context() + context_config = BrowserContextConfig() + + # if there is context config in the config, use it. + if (config.browser_config and + hasattr(config.browser_config, 'new_context_config') and + config.browser_config.new_context_config): + context_config = config.browser_config.new_context_config + + self.context = await self.browser.new_context(context_config) self.dom_service = DomService(await self.context.get_current_page()) + return self.context async def execute( diff --git a/config/config.example.toml b/config/config.example.toml index c1d51f0..13648dd 100644 --- a/config/config.example.toml +++ b/config/config.example.toml @@ -20,3 +20,25 @@ temperature = 0.0 model = "claude-3-5-sonnet" base_url = "https://api.openai.com/v1" api_key = "sk-..." + +# Optional configuration for specific browser configuration +# [browser] +# Whether to run browser in headless mode (default: false) +#headless = false +# Disable browser security features (default: true) +#disable_security = true +# Extra arguments to pass to the browser +#extra_chromium_args = [] +# Path to a Chrome instance to use to connect to your normal browser +# e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' +#chrome_instance_path = "" +# Connect to a browser instance via WebSocket +#wss_url = "" +# Connect to a browser instance via CDP +#cdp_url = "" + +# Optional configuration, Proxy settings for the browser +# [browser.proxy] +# server = "http://proxy-server:port" +# username = "proxy-username" +# password = "proxy-password" From e6e31a2c135c88e71c2173d7882d4e266369d739 Mon Sep 17 00:00:00 2001 From: liangxinbing <1580466765@qq.com> Date: Wed, 12 Mar 2025 20:09:23 +0800 Subject: [PATCH 2/5] update timeout to 300 --- app/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/llm.py b/app/llm.py index 8f62782..7a9d847 100644 --- a/app/llm.py +++ b/app/llm.py @@ -185,7 +185,7 @@ class LLM: self, messages: List[Union[dict, Message]], system_msgs: Optional[List[Union[dict, Message]]] = None, - timeout: int = 60, + timeout: int = 300, tools: Optional[List[dict]] = None, tool_choice: Literal["none", "auto", "required"] = "auto", temperature: Optional[float] = None, From 74a4c8bef090d2bb195ef52a05f376a795e5d888 Mon Sep 17 00:00:00 2001 From: liangxinbing <1580466765@qq.com> Date: Wed, 12 Mar 2025 20:25:37 +0800 Subject: [PATCH 3/5] fix bug of abnormal exit for BrowserUseTool --- app/agent/manus.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/agent/manus.py b/app/agent/manus.py index 940472e..013a139 100644 --- a/app/agent/manus.py +++ b/app/agent/manus.py @@ -1,3 +1,5 @@ +from typing import Any + from pydantic import Field from app.agent.toolcall import ToolCallAgent @@ -34,3 +36,7 @@ class Manus(ToolCallAgent): ) max_steps: int = 20 + + async def _handle_special_tool(self, name: str, result: Any, **kwargs): + await self.available_tools.get_tool(BrowserUseTool().name).cleanup() + await super()._handle_special_tool(name, result, **kwargs) From 31c7e69faf40ec40c549c30f23ede51e5ee9283e Mon Sep 17 00:00:00 2001 From: xiangjinyu <1376193973@qq.com> Date: Wed, 12 Mar 2025 20:27:14 +0800 Subject: [PATCH 4/5] add max_steps --- app/agent/manus.py | 5 +++-- app/agent/toolcall.py | 7 ++++++- app/tool/terminate.py | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/app/agent/manus.py b/app/agent/manus.py index 940472e..db68cca 100644 --- a/app/agent/manus.py +++ b/app/agent/manus.py @@ -26,11 +26,12 @@ class Manus(ToolCallAgent): system_prompt: str = SYSTEM_PROMPT next_step_prompt: str = NEXT_STEP_PROMPT + max_observe: int = 2000 + max_steps: int = 20 + # Add general-purpose tools to the tool collection available_tools: ToolCollection = Field( default_factory=lambda: ToolCollection( PythonExecute(), GoogleSearch(), BrowserUseTool(), FileSaver(), Terminate() ) ) - - max_steps: int = 20 diff --git a/app/agent/toolcall.py b/app/agent/toolcall.py index b3b6439..8d6930f 100644 --- a/app/agent/toolcall.py +++ b/app/agent/toolcall.py @@ -1,5 +1,5 @@ import json -from typing import Any, List, Literal +from typing import Any, List, Literal, Optional, Union from pydantic import Field @@ -31,6 +31,7 @@ class ToolCallAgent(ReActAgent): tool_calls: List[ToolCall] = Field(default_factory=list) max_steps: int = 30 + max_observe: Optional[Union[int, bool]] = None async def think(self) -> bool: """Process current state and decide next actions using tools""" @@ -110,6 +111,10 @@ class ToolCallAgent(ReActAgent): results = [] for command in self.tool_calls: result = await self.execute_tool(command) + + if self.max_observe: + result = result[: self.max_observe] + logger.info( f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}" ) diff --git a/app/tool/terminate.py b/app/tool/terminate.py index 2c8e783..8c2d82c 100644 --- a/app/tool/terminate.py +++ b/app/tool/terminate.py @@ -1,7 +1,8 @@ from app.tool.base import BaseTool -_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task.""" +_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task. +When you have finished all the tasks, call this tool to end the work.""" class Terminate(BaseTool): From c6cd296108faa4867f21e3b1037715a4d271ac11 Mon Sep 17 00:00:00 2001 From: xiangjinyu <1376193973@qq.com> Date: Wed, 12 Mar 2025 20:52:45 +0800 Subject: [PATCH 5/5] add pre-commit --- README_ko.md | 2 +- README_zh.md | 6 ++-- app/config.py | 54 ++++++++++++++++++++++++------------ app/prompt/manus.py | 2 +- app/tool/browser_use_tool.py | 44 +++++++++++++++++------------ 5 files changed, 67 insertions(+), 41 deletions(-) diff --git a/README_ko.md b/README_ko.md index ae0778e..7f82ec3 100644 --- a/README_ko.md +++ b/README_ko.md @@ -141,7 +141,7 @@ Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공 ## 감사의 글 -이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와 +이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와 [browser-use](https://github.com/browser-use/browser-use)에게 감사드립니다! 또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands)에 깊은 감사를 드립니다. diff --git a/README_zh.md b/README_zh.md index fd5526e..355e9dc 100644 --- a/README_zh.md +++ b/README_zh.md @@ -12,8 +12,8 @@ Manus 非常棒,但 OpenManus 无需邀请码即可实现任何创意 🛫! -我们的团队成员 [@mannaandpoem](https://github.com/mannaandpoem) [@XiangJinyu](https://github.com/XiangJinyu) [@MoshiQAQ](https://github.com/MoshiQAQ) [@didiforgithub](https://github.com/didiforgithub) https://github.com/stellaHSR 来自 [@MetaGPT](https://github.com/geekan/MetaGPT) 组织,我们在 3 -小时内完成了原型开发并持续迭代中! +我们的团队成员 [@Xinbin Liang](https://github.com/mannaandpoem) 和 [@Jinyu Xiang](https://github.com/XiangJinyu)(核心作者),以及 [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub) 和 [@Sirui Hong](https://github.com/stellaHSR),来自 [@MetaGPT](https://github.com/geekan/MetaGPT)团队。我们在 3 +小时内完成了开发并持续迭代中! 这是一个简洁的实现方案,欢迎任何建议、贡献和反馈! @@ -163,4 +163,4 @@ OpenManus 由 MetaGPT 社区的贡献者共同构建,感谢这个充满活力 journal = {GitHub repository}, howpublished = {\url{https://github.com/mannaandpoem/OpenManus}}, } -``` \ No newline at end of file +``` diff --git a/app/config.py b/app/config.py index cfd3f25..64f478d 100644 --- a/app/config.py +++ b/app/config.py @@ -33,17 +33,31 @@ class ProxySettings(BaseModel): class BrowserSettings(BaseModel): headless: bool = Field(False, description="Whether to run browser in headless mode") - disable_security: bool = Field(True, description="Disable browser security features") - extra_chromium_args: List[str] = Field(default_factory=list, description="Extra arguments to pass to the browser") - chrome_instance_path: Optional[str] = Field(None, description="Path to a Chrome instance to use") - wss_url: Optional[str] = Field(None, description="Connect to a browser instance via WebSocket") - cdp_url: Optional[str] = Field(None, description="Connect to a browser instance via CDP") - proxy: Optional[ProxySettings] = Field(None, description="Proxy settings for the browser") + disable_security: bool = Field( + True, description="Disable browser security features" + ) + extra_chromium_args: List[str] = Field( + default_factory=list, description="Extra arguments to pass to the browser" + ) + chrome_instance_path: Optional[str] = Field( + None, description="Path to a Chrome instance to use" + ) + wss_url: Optional[str] = Field( + None, description="Connect to a browser instance via WebSocket" + ) + cdp_url: Optional[str] = Field( + None, description="Connect to a browser instance via CDP" + ) + proxy: Optional[ProxySettings] = Field( + None, description="Proxy settings for the browser" + ) class AppConfig(BaseModel): llm: Dict[str, LLMSettings] - browser_config: Optional[BrowserSettings] = Field(None, description="Browser configuration") + browser_config: Optional[BrowserSettings] = Field( + None, description="Browser configuration" + ) class Config: arbitrary_types_allowed = True @@ -105,28 +119,32 @@ class Config: # handle browser config. browser_config = raw_config.get("browser", {}) browser_settings = None - + if browser_config: # handle proxy settings. proxy_config = browser_config.get("proxy", {}) proxy_settings = None - + if proxy_config and proxy_config.get("server"): - proxy_settings = ProxySettings(**{ - k: v for k, v in proxy_config.items() - if k in ["server", "username", "password"] and v - }) - + proxy_settings = ProxySettings( + **{ + k: v + for k, v in proxy_config.items() + if k in ["server", "username", "password"] and v + } + ) + # filter valid browser config parameters. valid_browser_params = { - k: v for k, v in browser_config.items() + k: v + for k, v in browser_config.items() if k in BrowserSettings.__annotations__ and v is not None } - + # if there is proxy settings, add it to the parameters. if proxy_settings: valid_browser_params["proxy"] = proxy_settings - + # only create BrowserSettings when there are valid parameters. if valid_browser_params: browser_settings = BrowserSettings(**valid_browser_params) @@ -147,7 +165,7 @@ class Config: @property def llm(self) -> Dict[str, LLMSettings]: return self._config.llm - + @property def browser_config(self) -> Optional[BrowserSettings]: return self._config.browser_config diff --git a/app/prompt/manus.py b/app/prompt/manus.py index c0be47d..e46c793 100644 --- a/app/prompt/manus.py +++ b/app/prompt/manus.py @@ -15,4 +15,4 @@ Terminate: End the current interaction when the task is complete or when you nee Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps. Always maintain a helpful, informative tone throughout the interaction. If you encounter any limitations or need more details, clearly communicate this to the user before terminating. -""" \ No newline at end of file +""" diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index 22cefee..57ad03c 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -4,8 +4,7 @@ from typing import Optional from browser_use import Browser as BrowserUseBrowser from browser_use import BrowserConfig -from browser_use.browser.context import BrowserContext -from browser_use.browser.context import BrowserContextConfig +from browser_use.browser.context import BrowserContext, BrowserContextConfig from browser_use.dom.service import DomService from pydantic import Field, field_validator from pydantic_core.core_schema import ValidationInfo @@ -13,6 +12,7 @@ from pydantic_core.core_schema import ValidationInfo from app.config import config from app.tool.base import BaseTool, ToolResult + MAX_LENGTH = 2000 _BROWSER_DESCRIPTION = """ @@ -107,43 +107,49 @@ class BrowserUseTool(BaseTool): """Ensure browser and context are initialized.""" if self.browser is None: browser_config_kwargs = {"headless": False} - + if config.browser_config: from browser_use.browser.browser import ProxySettings - + # handle proxy settings. if config.browser_config.proxy and config.browser_config.proxy.server: browser_config_kwargs["proxy"] = ProxySettings( server=config.browser_config.proxy.server, username=config.browser_config.proxy.username, - password=config.browser_config.proxy.password + password=config.browser_config.proxy.password, ) - + browser_attrs = [ - "headless", "disable_security", "extra_chromium_args", - "chrome_instance_path", "wss_url", "cdp_url" + "headless", + "disable_security", + "extra_chromium_args", + "chrome_instance_path", + "wss_url", + "cdp_url", ] - + for attr in browser_attrs: value = getattr(config.browser_config, attr, None) if value is not None: if not isinstance(value, list) or value: browser_config_kwargs[attr] = value - + self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs)) - + if self.context is None: context_config = BrowserContextConfig() - + # if there is context config in the config, use it. - if (config.browser_config and - hasattr(config.browser_config, 'new_context_config') and - config.browser_config.new_context_config): + if ( + config.browser_config + and hasattr(config.browser_config, "new_context_config") + and config.browser_config.new_context_config + ): context_config = config.browser_config.new_context_config - + self.context = await self.browser.new_context(context_config) self.dom_service = DomService(await self.context.get_current_page()) - + return self.context async def execute( @@ -217,7 +223,9 @@ class BrowserUseTool(BaseTool): elif action == "get_html": html = await context.get_page_html() - truncated = html[:MAX_LENGTH] + "..." if len(html) > MAX_LENGTH else html + truncated = ( + html[:MAX_LENGTH] + "..." if len(html) > MAX_LENGTH else html + ) return ToolResult(output=truncated) elif action == "get_text":