Merge branch 'main' into refactor/standardize-tool-choice-literals
This commit is contained in:
commit
6b64b98b12
@ -12,8 +12,8 @@
|
||||
|
||||
Manus 非常棒,但 OpenManus 无需邀请码即可实现任何创意 🛫!
|
||||
|
||||
我们的团队成员 [@mannaandpoem](https://github.com/mannaandpoem) [@XiangJinyu](https://github.com/XiangJinyu) [@MoshiQAQ](https://github.com/MoshiQAQ) [@didiforgithub](https://github.com/didiforgithub) https://github.com/stellaHSR 来自 [@MetaGPT](https://github.com/geekan/MetaGPT) 组织,我们在 3
|
||||
小时内完成了原型开发并持续迭代中!
|
||||
我们的团队成员 [@Xinbin Liang](https://github.com/mannaandpoem) 和 [@Jinyu Xiang](https://github.com/XiangJinyu)(核心作者),以及 [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub) 和 [@Sirui Hong](https://github.com/stellaHSR),来自 [@MetaGPT](https://github.com/geekan/MetaGPT)团队。我们在 3
|
||||
小时内完成了开发并持续迭代中!
|
||||
|
||||
这是一个简洁的实现方案,欢迎任何建议、贡献和反馈!
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from app.agent.toolcall import ToolCallAgent
|
||||
@ -26,6 +28,9 @@ class Manus(ToolCallAgent):
|
||||
system_prompt: str = SYSTEM_PROMPT
|
||||
next_step_prompt: str = NEXT_STEP_PROMPT
|
||||
|
||||
max_observe: int = 2000
|
||||
max_steps: int = 20
|
||||
|
||||
# Add general-purpose tools to the tool collection
|
||||
available_tools: ToolCollection = Field(
|
||||
default_factory=lambda: ToolCollection(
|
||||
@ -33,4 +38,6 @@ class Manus(ToolCallAgent):
|
||||
)
|
||||
)
|
||||
|
||||
max_steps: int = 20
|
||||
async def _handle_special_tool(self, name: str, result: Any, **kwargs):
|
||||
await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
|
||||
await super()._handle_special_tool(name, result, **kwargs)
|
||||
|
@ -1,5 +1,6 @@
|
||||
import json
|
||||
from typing import Any, List
|
||||
|
||||
from typing import Any, List, Literal, Optional, Union
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
@ -31,6 +32,7 @@ class ToolCallAgent(ReActAgent):
|
||||
tool_calls: List[ToolCall] = Field(default_factory=list)
|
||||
|
||||
max_steps: int = 30
|
||||
max_observe: Optional[Union[int, bool]] = None
|
||||
|
||||
async def think(self) -> bool:
|
||||
"""Process current state and decide next actions using tools"""
|
||||
@ -110,6 +112,10 @@ class ToolCallAgent(ReActAgent):
|
||||
results = []
|
||||
for command in self.tool_calls:
|
||||
result = await self.execute_tool(command)
|
||||
|
||||
if self.max_observe:
|
||||
result = result[: self.max_observe]
|
||||
|
||||
logger.info(
|
||||
f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}"
|
||||
)
|
||||
|
@ -1,7 +1,7 @@
|
||||
import threading
|
||||
import tomllib
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@ -25,8 +25,42 @@ class LLMSettings(BaseModel):
|
||||
api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
|
||||
|
||||
|
||||
class ProxySettings(BaseModel):
|
||||
server: str = Field(None, description="Proxy server address")
|
||||
username: Optional[str] = Field(None, description="Proxy username")
|
||||
password: Optional[str] = Field(None, description="Proxy password")
|
||||
|
||||
|
||||
class BrowserSettings(BaseModel):
|
||||
headless: bool = Field(False, description="Whether to run browser in headless mode")
|
||||
disable_security: bool = Field(
|
||||
True, description="Disable browser security features"
|
||||
)
|
||||
extra_chromium_args: List[str] = Field(
|
||||
default_factory=list, description="Extra arguments to pass to the browser"
|
||||
)
|
||||
chrome_instance_path: Optional[str] = Field(
|
||||
None, description="Path to a Chrome instance to use"
|
||||
)
|
||||
wss_url: Optional[str] = Field(
|
||||
None, description="Connect to a browser instance via WebSocket"
|
||||
)
|
||||
cdp_url: Optional[str] = Field(
|
||||
None, description="Connect to a browser instance via CDP"
|
||||
)
|
||||
proxy: Optional[ProxySettings] = Field(
|
||||
None, description="Proxy settings for the browser"
|
||||
)
|
||||
|
||||
|
||||
class AppConfig(BaseModel):
|
||||
llm: Dict[str, LLMSettings]
|
||||
browser_config: Optional[BrowserSettings] = Field(
|
||||
None, description="Browser configuration"
|
||||
)
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
class Config:
|
||||
@ -82,6 +116,39 @@ class Config:
|
||||
"api_version": base_llm.get("api_version", ""),
|
||||
}
|
||||
|
||||
# handle browser config.
|
||||
browser_config = raw_config.get("browser", {})
|
||||
browser_settings = None
|
||||
|
||||
if browser_config:
|
||||
# handle proxy settings.
|
||||
proxy_config = browser_config.get("proxy", {})
|
||||
proxy_settings = None
|
||||
|
||||
if proxy_config and proxy_config.get("server"):
|
||||
proxy_settings = ProxySettings(
|
||||
**{
|
||||
k: v
|
||||
for k, v in proxy_config.items()
|
||||
if k in ["server", "username", "password"] and v
|
||||
}
|
||||
)
|
||||
|
||||
# filter valid browser config parameters.
|
||||
valid_browser_params = {
|
||||
k: v
|
||||
for k, v in browser_config.items()
|
||||
if k in BrowserSettings.__annotations__ and v is not None
|
||||
}
|
||||
|
||||
# if there is proxy settings, add it to the parameters.
|
||||
if proxy_settings:
|
||||
valid_browser_params["proxy"] = proxy_settings
|
||||
|
||||
# only create BrowserSettings when there are valid parameters.
|
||||
if valid_browser_params:
|
||||
browser_settings = BrowserSettings(**valid_browser_params)
|
||||
|
||||
config_dict = {
|
||||
"llm": {
|
||||
"default": default_settings,
|
||||
@ -89,7 +156,8 @@ class Config:
|
||||
name: {**default_settings, **override_config}
|
||||
for name, override_config in llm_overrides.items()
|
||||
},
|
||||
}
|
||||
},
|
||||
"browser_config": browser_settings,
|
||||
}
|
||||
|
||||
self._config = AppConfig(**config_dict)
|
||||
@ -98,5 +166,9 @@ class Config:
|
||||
def llm(self) -> Dict[str, LLMSettings]:
|
||||
return self._config.llm
|
||||
|
||||
@property
|
||||
def browser_config(self) -> Optional[BrowserSettings]:
|
||||
return self._config.browser_config
|
||||
|
||||
|
||||
config = Config()
|
||||
|
@ -185,7 +185,7 @@ class LLM:
|
||||
self,
|
||||
messages: List[Union[dict, Message]],
|
||||
system_msgs: Optional[List[Union[dict, Message]]] = None,
|
||||
timeout: int = 60,
|
||||
timeout: int = 300,
|
||||
tools: Optional[List[dict]] = None,
|
||||
tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore
|
||||
temperature: Optional[float] = None,
|
||||
|
@ -4,13 +4,15 @@ from typing import Optional
|
||||
|
||||
from browser_use import Browser as BrowserUseBrowser
|
||||
from browser_use import BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.dom.service import DomService
|
||||
from pydantic import Field, field_validator
|
||||
from pydantic_core.core_schema import ValidationInfo
|
||||
|
||||
from app.config import config
|
||||
from app.tool.base import BaseTool, ToolResult
|
||||
|
||||
|
||||
MAX_LENGTH = 2000
|
||||
|
||||
_BROWSER_DESCRIPTION = """
|
||||
@ -104,10 +106,50 @@ class BrowserUseTool(BaseTool):
|
||||
async def _ensure_browser_initialized(self) -> BrowserContext:
|
||||
"""Ensure browser and context are initialized."""
|
||||
if self.browser is None:
|
||||
self.browser = BrowserUseBrowser(BrowserConfig(headless=False))
|
||||
browser_config_kwargs = {"headless": False}
|
||||
|
||||
if config.browser_config:
|
||||
from browser_use.browser.browser import ProxySettings
|
||||
|
||||
# handle proxy settings.
|
||||
if config.browser_config.proxy and config.browser_config.proxy.server:
|
||||
browser_config_kwargs["proxy"] = ProxySettings(
|
||||
server=config.browser_config.proxy.server,
|
||||
username=config.browser_config.proxy.username,
|
||||
password=config.browser_config.proxy.password,
|
||||
)
|
||||
|
||||
browser_attrs = [
|
||||
"headless",
|
||||
"disable_security",
|
||||
"extra_chromium_args",
|
||||
"chrome_instance_path",
|
||||
"wss_url",
|
||||
"cdp_url",
|
||||
]
|
||||
|
||||
for attr in browser_attrs:
|
||||
value = getattr(config.browser_config, attr, None)
|
||||
if value is not None:
|
||||
if not isinstance(value, list) or value:
|
||||
browser_config_kwargs[attr] = value
|
||||
|
||||
self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs))
|
||||
|
||||
if self.context is None:
|
||||
self.context = await self.browser.new_context()
|
||||
context_config = BrowserContextConfig()
|
||||
|
||||
# if there is context config in the config, use it.
|
||||
if (
|
||||
config.browser_config
|
||||
and hasattr(config.browser_config, "new_context_config")
|
||||
and config.browser_config.new_context_config
|
||||
):
|
||||
context_config = config.browser_config.new_context_config
|
||||
|
||||
self.context = await self.browser.new_context(context_config)
|
||||
self.dom_service = DomService(await self.context.get_current_page())
|
||||
|
||||
return self.context
|
||||
|
||||
async def execute(
|
||||
@ -181,7 +223,9 @@ class BrowserUseTool(BaseTool):
|
||||
|
||||
elif action == "get_html":
|
||||
html = await context.get_page_html()
|
||||
truncated = html[:MAX_LENGTH] + "..." if len(html) > MAX_LENGTH else html
|
||||
truncated = (
|
||||
html[:MAX_LENGTH] + "..." if len(html) > MAX_LENGTH else html
|
||||
)
|
||||
return ToolResult(output=truncated)
|
||||
|
||||
elif action == "get_text":
|
||||
|
@ -1,7 +1,8 @@
|
||||
from app.tool.base import BaseTool
|
||||
|
||||
|
||||
_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task."""
|
||||
_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task.
|
||||
When you have finished all the tasks, call this tool to end the work."""
|
||||
|
||||
|
||||
class Terminate(BaseTool):
|
||||
|
@ -20,3 +20,25 @@ temperature = 0.0
|
||||
model = "claude-3-5-sonnet"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
api_key = "sk-..."
|
||||
|
||||
# Optional configuration for specific browser configuration
|
||||
# [browser]
|
||||
# Whether to run browser in headless mode (default: false)
|
||||
#headless = false
|
||||
# Disable browser security features (default: true)
|
||||
#disable_security = true
|
||||
# Extra arguments to pass to the browser
|
||||
#extra_chromium_args = []
|
||||
# Path to a Chrome instance to use to connect to your normal browser
|
||||
# e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
|
||||
#chrome_instance_path = ""
|
||||
# Connect to a browser instance via WebSocket
|
||||
#wss_url = ""
|
||||
# Connect to a browser instance via CDP
|
||||
#cdp_url = ""
|
||||
|
||||
# Optional configuration, Proxy settings for the browser
|
||||
# [browser.proxy]
|
||||
# server = "http://proxy-server:port"
|
||||
# username = "proxy-username"
|
||||
# password = "proxy-password"
|
||||
|
Loading…
x
Reference in New Issue
Block a user