Merge branch 'main' into refactor/standardize-tool-choice-literals

This commit is contained in:
Isaac 2025-03-12 20:55:52 +08:00 committed by GitHub
commit 6b64b98b12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 167 additions and 15 deletions

View File

@ -12,8 +12,8 @@
Manus 非常棒,但 OpenManus 无需邀请码即可实现任何创意 🛫!
我们的团队成员 [@mannaandpoem](https://github.com/mannaandpoem) [@XiangJinyu](https://github.com/XiangJinyu) [@MoshiQAQ](https://github.com/MoshiQAQ) [@didiforgithub](https://github.com/didiforgithub) https://github.com/stellaHSR 来自 [@MetaGPT](https://github.com/geekan/MetaGPT) 组织,我们在 3
小时内完成了原型开发并持续迭代中!
我们的团队成员 [@Xinbin Liang](https://github.com/mannaandpoem) 和 [@Jinyu Xiang](https://github.com/XiangJinyu)(核心作者),以及 [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub) 和 [@Sirui Hong](https://github.com/stellaHSR),来自 [@MetaGPT](https://github.com/geekan/MetaGPT)团队。我们在 3
小时内完成了开发并持续迭代中!
这是一个简洁的实现方案,欢迎任何建议、贡献和反馈!

View File

@ -1,3 +1,5 @@
from typing import Any
from pydantic import Field
from app.agent.toolcall import ToolCallAgent
@ -26,6 +28,9 @@ class Manus(ToolCallAgent):
system_prompt: str = SYSTEM_PROMPT
next_step_prompt: str = NEXT_STEP_PROMPT
max_observe: int = 2000
max_steps: int = 20
# Add general-purpose tools to the tool collection
available_tools: ToolCollection = Field(
default_factory=lambda: ToolCollection(
@ -33,4 +38,6 @@ class Manus(ToolCallAgent):
)
)
max_steps: int = 20
async def _handle_special_tool(self, name: str, result: Any, **kwargs):
await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
await super()._handle_special_tool(name, result, **kwargs)

View File

@ -1,5 +1,6 @@
import json
from typing import Any, List
from typing import Any, List, Literal, Optional, Union
from pydantic import Field
@ -31,6 +32,7 @@ class ToolCallAgent(ReActAgent):
tool_calls: List[ToolCall] = Field(default_factory=list)
max_steps: int = 30
max_observe: Optional[Union[int, bool]] = None
async def think(self) -> bool:
"""Process current state and decide next actions using tools"""
@ -110,6 +112,10 @@ class ToolCallAgent(ReActAgent):
results = []
for command in self.tool_calls:
result = await self.execute_tool(command)
if self.max_observe:
result = result[: self.max_observe]
logger.info(
f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}"
)

View File

@ -1,7 +1,7 @@
import threading
import tomllib
from pathlib import Path
from typing import Dict
from typing import Dict, List, Optional
from pydantic import BaseModel, Field
@ -25,8 +25,42 @@ class LLMSettings(BaseModel):
api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
class ProxySettings(BaseModel):
server: str = Field(None, description="Proxy server address")
username: Optional[str] = Field(None, description="Proxy username")
password: Optional[str] = Field(None, description="Proxy password")
class BrowserSettings(BaseModel):
headless: bool = Field(False, description="Whether to run browser in headless mode")
disable_security: bool = Field(
True, description="Disable browser security features"
)
extra_chromium_args: List[str] = Field(
default_factory=list, description="Extra arguments to pass to the browser"
)
chrome_instance_path: Optional[str] = Field(
None, description="Path to a Chrome instance to use"
)
wss_url: Optional[str] = Field(
None, description="Connect to a browser instance via WebSocket"
)
cdp_url: Optional[str] = Field(
None, description="Connect to a browser instance via CDP"
)
proxy: Optional[ProxySettings] = Field(
None, description="Proxy settings for the browser"
)
class AppConfig(BaseModel):
llm: Dict[str, LLMSettings]
browser_config: Optional[BrowserSettings] = Field(
None, description="Browser configuration"
)
class Config:
arbitrary_types_allowed = True
class Config:
@ -82,6 +116,39 @@ class Config:
"api_version": base_llm.get("api_version", ""),
}
# handle browser config.
browser_config = raw_config.get("browser", {})
browser_settings = None
if browser_config:
# handle proxy settings.
proxy_config = browser_config.get("proxy", {})
proxy_settings = None
if proxy_config and proxy_config.get("server"):
proxy_settings = ProxySettings(
**{
k: v
for k, v in proxy_config.items()
if k in ["server", "username", "password"] and v
}
)
# filter valid browser config parameters.
valid_browser_params = {
k: v
for k, v in browser_config.items()
if k in BrowserSettings.__annotations__ and v is not None
}
# if there is proxy settings, add it to the parameters.
if proxy_settings:
valid_browser_params["proxy"] = proxy_settings
# only create BrowserSettings when there are valid parameters.
if valid_browser_params:
browser_settings = BrowserSettings(**valid_browser_params)
config_dict = {
"llm": {
"default": default_settings,
@ -89,7 +156,8 @@ class Config:
name: {**default_settings, **override_config}
for name, override_config in llm_overrides.items()
},
}
},
"browser_config": browser_settings,
}
self._config = AppConfig(**config_dict)
@ -98,5 +166,9 @@ class Config:
def llm(self) -> Dict[str, LLMSettings]:
return self._config.llm
@property
def browser_config(self) -> Optional[BrowserSettings]:
return self._config.browser_config
config = Config()

View File

@ -185,7 +185,7 @@ class LLM:
self,
messages: List[Union[dict, Message]],
system_msgs: Optional[List[Union[dict, Message]]] = None,
timeout: int = 60,
timeout: int = 300,
tools: Optional[List[dict]] = None,
tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore
temperature: Optional[float] = None,

View File

@ -4,13 +4,15 @@ from typing import Optional
from browser_use import Browser as BrowserUseBrowser
from browser_use import BrowserConfig
from browser_use.browser.context import BrowserContext
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from browser_use.dom.service import DomService
from pydantic import Field, field_validator
from pydantic_core.core_schema import ValidationInfo
from app.config import config
from app.tool.base import BaseTool, ToolResult
MAX_LENGTH = 2000
_BROWSER_DESCRIPTION = """
@ -104,10 +106,50 @@ class BrowserUseTool(BaseTool):
async def _ensure_browser_initialized(self) -> BrowserContext:
"""Ensure browser and context are initialized."""
if self.browser is None:
self.browser = BrowserUseBrowser(BrowserConfig(headless=False))
browser_config_kwargs = {"headless": False}
if config.browser_config:
from browser_use.browser.browser import ProxySettings
# handle proxy settings.
if config.browser_config.proxy and config.browser_config.proxy.server:
browser_config_kwargs["proxy"] = ProxySettings(
server=config.browser_config.proxy.server,
username=config.browser_config.proxy.username,
password=config.browser_config.proxy.password,
)
browser_attrs = [
"headless",
"disable_security",
"extra_chromium_args",
"chrome_instance_path",
"wss_url",
"cdp_url",
]
for attr in browser_attrs:
value = getattr(config.browser_config, attr, None)
if value is not None:
if not isinstance(value, list) or value:
browser_config_kwargs[attr] = value
self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs))
if self.context is None:
self.context = await self.browser.new_context()
context_config = BrowserContextConfig()
# if there is context config in the config, use it.
if (
config.browser_config
and hasattr(config.browser_config, "new_context_config")
and config.browser_config.new_context_config
):
context_config = config.browser_config.new_context_config
self.context = await self.browser.new_context(context_config)
self.dom_service = DomService(await self.context.get_current_page())
return self.context
async def execute(
@ -181,7 +223,9 @@ class BrowserUseTool(BaseTool):
elif action == "get_html":
html = await context.get_page_html()
truncated = html[:MAX_LENGTH] + "..." if len(html) > MAX_LENGTH else html
truncated = (
html[:MAX_LENGTH] + "..." if len(html) > MAX_LENGTH else html
)
return ToolResult(output=truncated)
elif action == "get_text":

View File

@ -1,7 +1,8 @@
from app.tool.base import BaseTool
_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task."""
_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task.
When you have finished all the tasks, call this tool to end the work."""
class Terminate(BaseTool):

View File

@ -20,3 +20,25 @@ temperature = 0.0
model = "claude-3-5-sonnet"
base_url = "https://api.openai.com/v1"
api_key = "sk-..."
# Optional configuration for specific browser configuration
# [browser]
# Whether to run browser in headless mode (default: false)
#headless = false
# Disable browser security features (default: true)
#disable_security = true
# Extra arguments to pass to the browser
#extra_chromium_args = []
# Path to a Chrome instance to use to connect to your normal browser
# e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
#chrome_instance_path = ""
# Connect to a browser instance via WebSocket
#wss_url = ""
# Connect to a browser instance via CDP
#cdp_url = ""
# Optional configuration, Proxy settings for the browser
# [browser.proxy]
# server = "http://proxy-server:port"
# username = "proxy-username"
# password = "proxy-password"