diff --git a/README.md b/README.md index ee33f75..ae93a47 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ Join our networking group on Feishu and share your experience with other develop Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) and [browser-use](https://github.com/browser-use/browser-use) for providing basic support for this project! -Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT) and [OpenHands](https://github.com/All-Hands-AI/OpenHands). +Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands) and [SWE-agent](https://github.com/SWE-agent/SWE-agent). OpenManus is built by contributors from MetaGPT. Huge thanks to this agent community! diff --git a/README_ja.md b/README_ja.md index 668d9e3..2dd84d5 100644 --- a/README_ja.md +++ b/README_ja.md @@ -144,7 +144,7 @@ Feishuのネットワーキンググループに参加して、他の開発者 このプロジェクトの基本的なサポートを提供してくれた[anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) と[browser-use](https://github.com/browser-use/browser-use)に感謝します! -さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)にも感謝します。 +さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)、[SWE-agent](https://github.com/SWE-agent/SWE-agent)にも感謝します。 OpenManusはMetaGPTのコントリビューターによって構築されました。このエージェントコミュニティに大きな感謝を! diff --git a/README_ko.md b/README_ko.md index 5cefd84..379363e 100644 --- a/README_ko.md +++ b/README_ko.md @@ -144,7 +144,7 @@ Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공 이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와 [browser-use](https://github.com/browser-use/browser-use)에게 감사드립니다! -또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands)에 깊은 감사를 드립니다. +또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands), [SWE-agent](https://github.com/SWE-agent/SWE-agent)에 깊은 감사를 드립니다. OpenManus는 MetaGPT 기여자들에 의해 개발되었습니다. 이 에이전트 커뮤니티에 깊은 감사를 전합니다! diff --git a/README_zh.md b/README_zh.md index 28f6749..ea7f904 100644 --- a/README_zh.md +++ b/README_zh.md @@ -119,7 +119,7 @@ python main.py 然后通过终端输入你的创意! -如需体验开发中版本,可运行: +如需体验不稳定的开发版本,可运行: ```bash python run_flow.py @@ -148,7 +148,7 @@ python run_flow.py 特别感谢 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) 和 [browser-use](https://github.com/browser-use/browser-use) 为本项目提供的基础支持! -此外,我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge),[MetaGPT](https://github.com/geekan/MetaGPT) 和 [OpenHands](https://github.com/All-Hands-AI/OpenHands). +此外,我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge),[MetaGPT](https://github.com/geekan/MetaGPT),[OpenHands](https://github.com/All-Hands-AI/OpenHands) 和 [SWE-agent](https://github.com/SWE-agent/SWE-agent). OpenManus 由 MetaGPT 社区的贡献者共同构建,感谢这个充满活力的智能体开发者社区! diff --git a/app/agent/base.py b/app/agent/base.py index 3830365..fa3db30 100644 --- a/app/agent/base.py +++ b/app/agent/base.py @@ -6,7 +6,7 @@ from pydantic import BaseModel, Field, model_validator from app.llm import LLM from app.logger import logger -from app.schema import AgentState, Memory, Message, ROLE_TYPE +from app.schema import ROLE_TYPE, AgentState, Memory, Message class BaseAgent(BaseModel, ABC): @@ -82,7 +82,7 @@ class BaseAgent(BaseModel, ABC): def update_memory( self, - role: ROLE_TYPE, # type: ignore + role: ROLE_TYPE, # type: ignore content: str, **kwargs, ) -> None: diff --git a/app/agent/manus.py b/app/agent/manus.py index fdf0a10..6c2c2e5 100644 --- a/app/agent/manus.py +++ b/app/agent/manus.py @@ -7,9 +7,8 @@ from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT from app.tool import Terminate, ToolCollection from app.tool.browser_use_tool import BrowserUseTool from app.tool.file_saver import FileSaver -from app.tool.web_search import WebSearch from app.tool.python_execute import PythonExecute -from app.config import config +from app.tool.web_search import WebSearch class Manus(ToolCallAgent): diff --git a/app/agent/planning.py b/app/agent/planning.py index 8cc2be8..7e98912 100644 --- a/app/agent/planning.py +++ b/app/agent/planning.py @@ -6,7 +6,7 @@ from pydantic import Field, model_validator from app.agent.toolcall import ToolCallAgent from app.logger import logger from app.prompt.planning import NEXT_STEP_PROMPT, PLANNING_SYSTEM_PROMPT -from app.schema import Message, TOOL_CHOICE_TYPE, ToolCall, ToolChoice +from app.schema import TOOL_CHOICE_TYPE, Message, ToolCall, ToolChoice from app.tool import PlanningTool, Terminate, ToolCollection @@ -27,7 +27,7 @@ class PlanningAgent(ToolCallAgent): available_tools: ToolCollection = Field( default_factory=lambda: ToolCollection(PlanningTool(), Terminate()) ) - tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore + tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name]) tool_calls: List[ToolCall] = Field(default_factory=list) diff --git a/app/agent/toolcall.py b/app/agent/toolcall.py index 1f04784..ecf0bb4 100644 --- a/app/agent/toolcall.py +++ b/app/agent/toolcall.py @@ -1,13 +1,12 @@ import json - -from typing import Any, List, Literal, Optional, Union +from typing import Any, List, Optional, Union from pydantic import Field from app.agent.react import ReActAgent from app.logger import logger from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT -from app.schema import AgentState, Message, ToolCall, TOOL_CHOICE_TYPE, ToolChoice +from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice from app.tool import CreateChatCompletion, Terminate, ToolCollection @@ -26,7 +25,7 @@ class ToolCallAgent(ReActAgent): available_tools: ToolCollection = ToolCollection( CreateChatCompletion(), Terminate() ) - tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore + tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name]) tool_calls: List[ToolCall] = Field(default_factory=list) diff --git a/app/config.py b/app/config.py index 8fd8bd7..0a267d7 100644 --- a/app/config.py +++ b/app/config.py @@ -30,8 +30,10 @@ class ProxySettings(BaseModel): username: Optional[str] = Field(None, description="Proxy username") password: Optional[str] = Field(None, description="Proxy password") + class SearchSettings(BaseModel): - engine: str = Field(default='Google', description="Search engine the llm to use") + engine: str = Field(default="Google", description="Search engine the llm to use") + class BrowserSettings(BaseModel): headless: bool = Field(False, description="Whether to run browser in headless mode") @@ -180,7 +182,7 @@ class Config: @property def browser_config(self) -> Optional[BrowserSettings]: return self._config.browser_config - + @property def search_config(self) -> Optional[SearchSettings]: return self._config.search_config diff --git a/app/llm.py b/app/llm.py index 3314062..8c085ae 100644 --- a/app/llm.py +++ b/app/llm.py @@ -12,10 +12,18 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential from app.config import LLMSettings, config from app.logger import logger # Assuming a logger is set up in your app -from app.schema import Message, TOOL_CHOICE_TYPE, ROLE_VALUES, TOOL_CHOICE_VALUES, ToolChoice +from app.schema import ( + ROLE_VALUES, + TOOL_CHOICE_TYPE, + TOOL_CHOICE_VALUES, + Message, + ToolChoice, +) + REASONING_MODELS = ["o1", "o3-mini"] + class LLM: _instances: Dict[str, "LLM"] = {} @@ -140,7 +148,7 @@ class LLM: } if self.model in REASONING_MODELS: - params["max_completion_tokens"] = self.max_tokens + params["max_completion_tokens"] = self.max_tokens else: params["max_tokens"] = self.max_tokens params["temperature"] = temperature or self.temperature @@ -191,7 +199,7 @@ class LLM: system_msgs: Optional[List[Union[dict, Message]]] = None, timeout: int = 300, tools: Optional[List[dict]] = None, - tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore + tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore temperature: Optional[float] = None, **kwargs, ): diff --git a/app/schema.py b/app/schema.py index 30ccf6c..fb89c3c 100644 --- a/app/schema.py +++ b/app/schema.py @@ -3,25 +3,32 @@ from typing import Any, List, Literal, Optional, Union from pydantic import BaseModel, Field + class Role(str, Enum): """Message role options""" + SYSTEM = "system" USER = "user" - ASSISTANT = "assistant" + ASSISTANT = "assistant" TOOL = "tool" + ROLE_VALUES = tuple(role.value for role in Role) ROLE_TYPE = Literal[ROLE_VALUES] # type: ignore + class ToolChoice(str, Enum): """Tool choice options""" + NONE = "none" AUTO = "auto" REQUIRED = "required" + TOOL_CHOICE_VALUES = tuple(choice.value for choice in ToolChoice) TOOL_CHOICE_TYPE = Literal[TOOL_CHOICE_VALUES] # type: ignore + class AgentState(str, Enum): """Agent execution states""" @@ -47,7 +54,7 @@ class ToolCall(BaseModel): class Message(BaseModel): """Represents a chat message in the conversation""" - role: ROLE_TYPE = Field(...) # type: ignore + role: ROLE_TYPE = Field(...) # type: ignore content: Optional[str] = Field(default=None) tool_calls: Optional[List[ToolCall]] = Field(default=None) name: Optional[str] = Field(default=None) @@ -104,7 +111,9 @@ class Message(BaseModel): @classmethod def tool_message(cls, content: str, name, tool_call_id: str) -> "Message": """Create a tool message""" - return cls(role=Role.TOOL, content=content, name=name, tool_call_id=tool_call_id) + return cls( + role=Role.TOOL, content=content, name=name, tool_call_id=tool_call_id + ) @classmethod def from_tool_calls( diff --git a/app/tool/python_execute.py b/app/tool/python_execute.py index e9c8140..08ceffa 100644 --- a/app/tool/python_execute.py +++ b/app/tool/python_execute.py @@ -1,6 +1,6 @@ +import multiprocessing import sys from io import StringIO -import multiprocessing from typing import Dict from app.tool.base import BaseTool @@ -53,17 +53,13 @@ class PythonExecute(BaseTool): """ with multiprocessing.Manager() as manager: - result = manager.dict({ - "observation": "", - "success": False - }) + result = manager.dict({"observation": "", "success": False}) if isinstance(__builtins__, dict): safe_globals = {"__builtins__": __builtins__} else: safe_globals = {"__builtins__": __builtins__.__dict__.copy()} proc = multiprocessing.Process( - target=self._run_code, - args=(code, result, safe_globals) + target=self._run_code, args=(code, result, safe_globals) ) proc.start() proc.join(timeout) diff --git a/app/tool/search/__init__.py b/app/tool/search/__init__.py index 509d16d..4f486ac 100644 --- a/app/tool/search/__init__.py +++ b/app/tool/search/__init__.py @@ -1,5 +1,5 @@ -from app.tool.search.base import WebSearchEngine from app.tool.search.baidu_search import BaiduSearchEngine +from app.tool.search.base import WebSearchEngine from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine from app.tool.search.google_search import GoogleSearchEngine @@ -9,4 +9,4 @@ __all__ = [ "BaiduSearchEngine", "DuckDuckGoSearchEngine", "GoogleSearchEngine", -] \ No newline at end of file +] diff --git a/app/tool/search/baidu_search.py b/app/tool/search/baidu_search.py index a398899..d415ce8 100644 --- a/app/tool/search/baidu_search.py +++ b/app/tool/search/baidu_search.py @@ -1,9 +1,9 @@ from baidusearch.baidusearch import search + from app.tool.search.base import WebSearchEngine class BaiduSearchEngine(WebSearchEngine): - - def perform_search(self, query, num_results = 10, *args, **kwargs): + def perform_search(self, query, num_results=10, *args, **kwargs): """Baidu search engine.""" return search(query, num_results=num_results) diff --git a/app/tool/search/base.py b/app/tool/search/base.py index 095c0b1..3132381 100644 --- a/app/tool/search/base.py +++ b/app/tool/search/base.py @@ -1,5 +1,7 @@ class WebSearchEngine(object): - def perform_search(self, query: str, num_results: int = 10, *args, **kwargs) -> list[dict]: + def perform_search( + self, query: str, num_results: int = 10, *args, **kwargs + ) -> list[dict]: """ Perform a web search and return a list of URLs. @@ -12,4 +14,4 @@ class WebSearchEngine(object): Returns: List: A list of dict matching the search query. """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/app/tool/search/duckduckgo_search.py b/app/tool/search/duckduckgo_search.py index 738ecf5..3dd5c52 100644 --- a/app/tool/search/duckduckgo_search.py +++ b/app/tool/search/duckduckgo_search.py @@ -1,9 +1,9 @@ from duckduckgo_search import DDGS + from app.tool.search.base import WebSearchEngine class DuckDuckGoSearchEngine(WebSearchEngine): - - async def perform_search(self, query, num_results = 10, *args, **kwargs): + async def perform_search(self, query, num_results=10, *args, **kwargs): """DuckDuckGo search engine.""" return DDGS.text(query, num_results=num_results) diff --git a/app/tool/search/google_search.py b/app/tool/search/google_search.py index 606f107..425106d 100644 --- a/app/tool/search/google_search.py +++ b/app/tool/search/google_search.py @@ -1,8 +1,9 @@ -from app.tool.search.base import WebSearchEngine from googlesearch import search +from app.tool.search.base import WebSearchEngine + + class GoogleSearchEngine(WebSearchEngine): - - def perform_search(self, query, num_results = 10, *args, **kwargs): + def perform_search(self, query, num_results=10, *args, **kwargs): """Google search engine.""" return search(query, num_results=num_results) diff --git a/app/tool/terminal.py b/app/tool/terminal.py index df5996e..86b401c 100644 --- a/app/tool/terminal.py +++ b/app/tool/terminal.py @@ -40,7 +40,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that str: The output, and error of the command execution. """ # Split the command by & to handle multiple commands - commands = [cmd.strip() for cmd in command.split('&') if cmd.strip()] + commands = [cmd.strip() for cmd in command.split("&") if cmd.strip()] final_output = CLIResult(output="", error="") for cmd in commands: @@ -61,7 +61,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that stdout, stderr = await self.process.communicate() result = CLIResult( output=stdout.decode().strip(), - error=stderr.decode().strip() + error=stderr.decode().strip(), ) except Exception as e: result = CLIResult(output="", error=str(e)) @@ -70,9 +70,13 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that # Combine outputs if result.output: - final_output.output += (result.output + "\n") if final_output.output else result.output + final_output.output += ( + (result.output + "\n") if final_output.output else result.output + ) if result.error: - final_output.error += (result.error + "\n") if final_output.error else result.error + final_output.error += ( + (result.error + "\n") if final_output.error else result.error + ) # Remove trailing newlines final_output.output = final_output.output.rstrip() @@ -124,14 +128,10 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that if os.path.isdir(new_path): self.current_path = new_path return CLIResult( - output=f"Changed directory to {self.current_path}", - error="" + output=f"Changed directory to {self.current_path}", error="" ) else: - return CLIResult( - output="", - error=f"No such directory: {new_path}" - ) + return CLIResult(output="", error=f"No such directory: {new_path}") except Exception as e: return CLIResult(output="", error=str(e)) @@ -152,7 +152,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that parts = shlex.split(command) if any(cmd in dangerous_commands for cmd in parts): raise ValueError("Use of dangerous commands is restricted.") - except Exception as e: + except Exception: # If shlex.split fails, try basic string comparison if any(cmd in command for cmd in dangerous_commands): raise ValueError("Use of dangerous commands is restricted.") diff --git a/app/tool/web_search.py b/app/tool/web_search.py index c661f3b..db4ee85 100644 --- a/app/tool/web_search.py +++ b/app/tool/web_search.py @@ -1,9 +1,14 @@ import asyncio from typing import List -from app.tool.base import BaseTool from app.config import config -from app.tool.search import WebSearchEngine, BaiduSearchEngine, GoogleSearchEngine, DuckDuckGoSearchEngine +from app.tool.base import BaseTool +from app.tool.search import ( + BaiduSearchEngine, + DuckDuckGoSearchEngine, + GoogleSearchEngine, + WebSearchEngine, +) class WebSearch(BaseTool): @@ -48,7 +53,8 @@ The tool returns a list of URLs that match the search query. loop = asyncio.get_event_loop() search_engine = self.get_search_engine() links = await loop.run_in_executor( - None, lambda: list(search_engine.perform_search(query, num_results=num_results)) + None, + lambda: list(search_engine.perform_search(query, num_results=num_results)), ) return links diff --git a/config/config.example.toml b/config/config.example.toml index d6c193a..762f42c 100644 --- a/config/config.example.toml +++ b/config/config.example.toml @@ -1,10 +1,10 @@ # Global LLM configuration [llm] -model = "claude-3-5-sonnet" -base_url = "https://api.openai.com/v1" -api_key = "sk-..." -max_tokens = 4096 -temperature = 0.0 +model = "claude-3-7-sonnet" # The LLM model to use +base_url = "https://api.openai.com/v1" # API endpoint URL +api_key = "sk-..." # Your API key +max_tokens = 8192 # Maximum number of tokens in the response +temperature = 0.0 # Controls randomness # [llm] #AZURE OPENAI: # api_type= 'azure' @@ -17,9 +17,11 @@ temperature = 0.0 # Optional configuration for specific LLM models [llm.vision] -model = "claude-3-5-sonnet" -base_url = "https://api.openai.com/v1" -api_key = "sk-..." +model = "claude-3-7-sonnet" # The vision model to use +base_url = "https://api.openai.com/v1" # API endpoint URL for vision model +api_key = "sk-..." # Your API key for vision model +max_tokens = 8192 # Maximum number of tokens in the response +temperature = 0.0 # Controls randomness for vision model # Optional configuration for specific browser configuration # [browser] @@ -46,4 +48,4 @@ api_key = "sk-..." # Optional configuration, Search settings. # [search] # Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo". -#engine = "Google" \ No newline at end of file +#engine = "Google"