update readme; format code; update config.example.toml

2025-03-14 12:20:59 +08:00 · 2025-03-14 12:20:59 +08:00 · 9c7834eff2
commit 9c7834eff2
parent e844dfca34
20 changed files with 88 additions and 64 deletions
--- a/README.md
+++ b/README.md
@ -143,7 +143,7 @@ Join our networking group on Feishu and share your experience with other develop
 Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
 and [browser-use](https://github.com/browser-use/browser-use) for providing basic support for this project!

-Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT) and [OpenHands](https://github.com/All-Hands-AI/OpenHands).
+Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands) and [SWE-agent](https://github.com/SWE-agent/SWE-agent).

 OpenManus is built by contributors from MetaGPT. Huge thanks to this agent community!

--- a/README_ja.md
+++ b/README_ja.md
@ -144,7 +144,7 @@ Feishuのネットワーキンググループに参加して、他の開発者
 このプロジェクトの基本的なサポートを提供してくれた[anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
 と[browser-use](https://github.com/browser-use/browser-use)に感謝します！

-さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)にも感謝します。
+さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)、[SWE-agent](https://github.com/SWE-agent/SWE-agent)にも感謝します。

 OpenManusはMetaGPTのコントリビューターによって構築されました。このエージェントコミュニティに大きな感謝を！

--- a/README_ko.md
+++ b/README_ko.md
@ -144,7 +144,7 @@ Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공
 이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와
 [browser-use](https://github.com/browser-use/browser-use)에게 감사드립니다!

-또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands)에 깊은 감사를 드립니다.
+또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands), [SWE-agent](https://github.com/SWE-agent/SWE-agent)에 깊은 감사를 드립니다.

 OpenManus는 MetaGPT 기여자들에 의해 개발되었습니다. 이 에이전트 커뮤니티에 깊은 감사를 전합니다!

--- a/README_zh.md
+++ b/README_zh.md
@ -119,7 +119,7 @@ python main.py

 然后通过终端输入你的创意！

-如需体验开发中版本，可运行：
+如需体验不稳定的开发版本，可运行：

 ```bash
 python run_flow.py
@ -148,7 +148,7 @@ python run_flow.py
 特别感谢 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
 和 [browser-use](https://github.com/browser-use/browser-use) 为本项目提供的基础支持！

-此外，我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge)，[MetaGPT](https://github.com/geekan/MetaGPT) 和 [OpenHands](https://github.com/All-Hands-AI/OpenHands).
+此外，我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge)，[MetaGPT](https://github.com/geekan/MetaGPT)，[OpenHands](https://github.com/All-Hands-AI/OpenHands) 和 [SWE-agent](https://github.com/SWE-agent/SWE-agent).

 OpenManus 由 MetaGPT 社区的贡献者共同构建，感谢这个充满活力的智能体开发者社区！

--- a/app/agent/base.py
+++ b/app/agent/base.py
@ -6,7 +6,7 @@ from pydantic import BaseModel, Field, model_validator

 from app.llm import LLM
 from app.logger import logger
-from app.schema import AgentState, Memory, Message, ROLE_TYPE
+from app.schema import ROLE_TYPE, AgentState, Memory, Message


 class BaseAgent(BaseModel, ABC):
--- a/app/agent/manus.py
+++ b/app/agent/manus.py
@ -7,9 +7,8 @@ from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
 from app.tool import Terminate, ToolCollection
 from app.tool.browser_use_tool import BrowserUseTool
 from app.tool.file_saver import FileSaver
-from app.tool.web_search import WebSearch
 from app.tool.python_execute import PythonExecute
-from app.config import config
+from app.tool.web_search import WebSearch


 class Manus(ToolCallAgent):
--- a/app/agent/planning.py
+++ b/app/agent/planning.py
@ -6,7 +6,7 @@ from pydantic import Field, model_validator
 from app.agent.toolcall import ToolCallAgent
 from app.logger import logger
 from app.prompt.planning import NEXT_STEP_PROMPT, PLANNING_SYSTEM_PROMPT
-from app.schema import Message, TOOL_CHOICE_TYPE, ToolCall, ToolChoice
+from app.schema import TOOL_CHOICE_TYPE, Message, ToolCall, ToolChoice
 from app.tool import PlanningTool, Terminate, ToolCollection


--- a/app/agent/toolcall.py
+++ b/app/agent/toolcall.py
@ -1,13 +1,12 @@
 import json
-
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, List, Optional, Union

 from pydantic import Field

 from app.agent.react import ReActAgent
 from app.logger import logger
 from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT
-from app.schema import AgentState, Message, ToolCall, TOOL_CHOICE_TYPE, ToolChoice
+from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice
 from app.tool import CreateChatCompletion, Terminate, ToolCollection


--- a/app/config.py
+++ b/app/config.py
@ -30,8 +30,10 @@ class ProxySettings(BaseModel):
    username: Optional[str] = Field(None, description="Proxy username")
    password: Optional[str] = Field(None, description="Proxy password")

+
 class SearchSettings(BaseModel):
-    engine: str = Field(default='Google', description="Search engine the llm to use")
+    engine: str = Field(default="Google", description="Search engine the llm to use")
+

 class BrowserSettings(BaseModel):
    headless: bool = Field(False, description="Whether to run browser in headless mode")
--- a/app/llm.py
+++ b/app/llm.py
@ -12,10 +12,18 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential

 from app.config import LLMSettings, config
 from app.logger import logger  # Assuming a logger is set up in your app
-from app.schema import Message, TOOL_CHOICE_TYPE, ROLE_VALUES, TOOL_CHOICE_VALUES, ToolChoice
+from app.schema import (
+    ROLE_VALUES,
+    TOOL_CHOICE_TYPE,
+    TOOL_CHOICE_VALUES,
+    Message,
+    ToolChoice,
+)
+

 REASONING_MODELS = ["o1", "o3-mini"]

+
 class LLM:
    _instances: Dict[str, "LLM"] = {}

--- a/app/schema.py
+++ b/app/schema.py
@ -3,25 +3,32 @@ from typing import Any, List, Literal, Optional, Union

 from pydantic import BaseModel, Field

+
 class Role(str, Enum):
    """Message role options"""
+
    SYSTEM = "system"
    USER = "user"
    ASSISTANT = "assistant"
    TOOL = "tool"

+
 ROLE_VALUES = tuple(role.value for role in Role)
 ROLE_TYPE = Literal[ROLE_VALUES]  # type: ignore

+
 class ToolChoice(str, Enum):
    """Tool choice options"""
+
    NONE = "none"
    AUTO = "auto"
    REQUIRED = "required"

+
 TOOL_CHOICE_VALUES = tuple(choice.value for choice in ToolChoice)
 TOOL_CHOICE_TYPE = Literal[TOOL_CHOICE_VALUES]  # type: ignore

+
 class AgentState(str, Enum):
    """Agent execution states"""

@ -104,7 +111,9 @@ class Message(BaseModel):
    @classmethod
    def tool_message(cls, content: str, name, tool_call_id: str) -> "Message":
        """Create a tool message"""
-        return cls(role=Role.TOOL, content=content, name=name, tool_call_id=tool_call_id)
+        return cls(
+            role=Role.TOOL, content=content, name=name, tool_call_id=tool_call_id
+        )

    @classmethod
    def from_tool_calls(
--- a/app/tool/python_execute.py
+++ b/app/tool/python_execute.py
@ -1,6 +1,6 @@
+import multiprocessing
 import sys
 from io import StringIO
-import multiprocessing
 from typing import Dict

 from app.tool.base import BaseTool
@ -53,17 +53,13 @@ class PythonExecute(BaseTool):
        """

        with multiprocessing.Manager() as manager:
-            result = manager.dict({
-                "observation": "",
-                "success": False
-            })
+            result = manager.dict({"observation": "", "success": False})
            if isinstance(__builtins__, dict):
                safe_globals = {"__builtins__": __builtins__}
            else:
                safe_globals = {"__builtins__": __builtins__.__dict__.copy()}
            proc = multiprocessing.Process(
-                target=self._run_code,
-                args=(code, result, safe_globals)
+                target=self._run_code, args=(code, result, safe_globals)
            )
            proc.start()
            proc.join(timeout)
--- a/app/tool/search/init.py
+++ b/app/tool/search/init.py
@ -1,5 +1,5 @@
-from app.tool.search.base import WebSearchEngine
 from app.tool.search.baidu_search import BaiduSearchEngine
+from app.tool.search.base import WebSearchEngine
 from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
 from app.tool.search.google_search import GoogleSearchEngine

--- a/app/tool/search/baidu_search.py
+++ b/app/tool/search/baidu_search.py
@ -1,9 +1,9 @@
 from baidusearch.baidusearch import search
+
 from app.tool.search.base import WebSearchEngine


 class BaiduSearchEngine(WebSearchEngine):
-    
-    def perform_search(self, query, num_results = 10, *args, **kwargs):
+    def perform_search(self, query, num_results=10, *args, **kwargs):
        """Baidu search engine."""
        return search(query, num_results=num_results)
--- a/app/tool/search/base.py
+++ b/app/tool/search/base.py
@ -1,5 +1,7 @@
 class WebSearchEngine(object):
-    def perform_search(self, query: str, num_results: int = 10, *args, **kwargs) -> list[dict]:
+    def perform_search(
+        self, query: str, num_results: int = 10, *args, **kwargs
+    ) -> list[dict]:
        """
        Perform a web search and return a list of URLs.

--- a/app/tool/search/duckduckgo_search.py
+++ b/app/tool/search/duckduckgo_search.py
@ -1,9 +1,9 @@
 from duckduckgo_search import DDGS
+
 from app.tool.search.base import WebSearchEngine


 class DuckDuckGoSearchEngine(WebSearchEngine):
-    
-    async def perform_search(self, query, num_results = 10, *args, **kwargs):
+    async def perform_search(self, query, num_results=10, *args, **kwargs):
        """DuckDuckGo search engine."""
        return DDGS.text(query, num_results=num_results)
--- a/app/tool/search/google_search.py
+++ b/app/tool/search/google_search.py
@ -1,8 +1,9 @@
-from app.tool.search.base import WebSearchEngine
 from googlesearch import search

-class GoogleSearchEngine(WebSearchEngine):
+from app.tool.search.base import WebSearchEngine

-    def perform_search(self, query, num_results = 10, *args, **kwargs):
+
+class GoogleSearchEngine(WebSearchEngine):
+    def perform_search(self, query, num_results=10, *args, **kwargs):
        """Google search engine."""
        return search(query, num_results=num_results)
--- a/app/tool/terminal.py
+++ b/app/tool/terminal.py
@ -40,7 +40,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that
            str: The output, and error of the command execution.
        """
        # Split the command by & to handle multiple commands
-        commands = [cmd.strip() for cmd in command.split('&') if cmd.strip()]
+        commands = [cmd.strip() for cmd in command.split("&") if cmd.strip()]
        final_output = CLIResult(output="", error="")

        for cmd in commands:
@ -61,7 +61,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that
                        stdout, stderr = await self.process.communicate()
                        result = CLIResult(
                            output=stdout.decode().strip(),
-                            error=stderr.decode().strip()
+                            error=stderr.decode().strip(),
                        )
                    except Exception as e:
                        result = CLIResult(output="", error=str(e))
@ -70,9 +70,13 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that

            # Combine outputs
            if result.output:
-                final_output.output += (result.output + "\n") if final_output.output else result.output
+                final_output.output += (
+                    (result.output + "\n") if final_output.output else result.output
+                )
            if result.error:
-                final_output.error += (result.error + "\n") if final_output.error else result.error
+                final_output.error += (
+                    (result.error + "\n") if final_output.error else result.error
+                )

        # Remove trailing newlines
        final_output.output = final_output.output.rstrip()
@ -124,14 +128,10 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that
            if os.path.isdir(new_path):
                self.current_path = new_path
                return CLIResult(
-                    output=f"Changed directory to {self.current_path}",
-                    error=""
+                    output=f"Changed directory to {self.current_path}", error=""
                )
            else:
-                return CLIResult(
-                    output="",
-                    error=f"No such directory: {new_path}"
-                )
+                return CLIResult(output="", error=f"No such directory: {new_path}")
        except Exception as e:
            return CLIResult(output="", error=str(e))

@ -152,7 +152,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that
            parts = shlex.split(command)
            if any(cmd in dangerous_commands for cmd in parts):
                raise ValueError("Use of dangerous commands is restricted.")
-        except Exception as e:
+        except Exception:
            # If shlex.split fails, try basic string comparison
            if any(cmd in command for cmd in dangerous_commands):
                raise ValueError("Use of dangerous commands is restricted.")
--- a/app/tool/web_search.py
+++ b/app/tool/web_search.py
@ -1,9 +1,14 @@
 import asyncio
 from typing import List

-from app.tool.base import BaseTool
 from app.config import config
-from app.tool.search import WebSearchEngine, BaiduSearchEngine, GoogleSearchEngine, DuckDuckGoSearchEngine
+from app.tool.base import BaseTool
+from app.tool.search import (
+    BaiduSearchEngine,
+    DuckDuckGoSearchEngine,
+    GoogleSearchEngine,
+    WebSearchEngine,
+)


 class WebSearch(BaseTool):
@ -48,7 +53,8 @@ The tool returns a list of URLs that match the search query.
        loop = asyncio.get_event_loop()
        search_engine = self.get_search_engine()
        links = await loop.run_in_executor(
-            None, lambda: list(search_engine.perform_search(query, num_results=num_results))
+            None,
+            lambda: list(search_engine.perform_search(query, num_results=num_results)),
        )

        return links
--- a/config/config.example.toml
+++ b/config/config.example.toml
@ -1,10 +1,10 @@
 # Global LLM configuration
 [llm]
-model = "claude-3-5-sonnet"
-base_url = "https://api.openai.com/v1"
-api_key = "sk-..."
-max_tokens = 4096
-temperature = 0.0
+model = "claude-3-7-sonnet"     # The LLM model to use
+base_url = "https://api.openai.com/v1"    # API endpoint URL
+api_key = "sk-..."    # Your API key
+max_tokens = 8192     # Maximum number of tokens in the response
+temperature = 0.0     # Controls randomness

 # [llm] #AZURE OPENAI:
 # api_type= 'azure'
@ -17,9 +17,11 @@ temperature = 0.0

 # Optional configuration for specific LLM models
 [llm.vision]
-model = "claude-3-5-sonnet"
-base_url = "https://api.openai.com/v1"
-api_key = "sk-..."
+model = "claude-3-7-sonnet"    # The vision model to use
+base_url = "https://api.openai.com/v1"    # API endpoint URL for vision model
+api_key = "sk-..."    # Your API key for vision model
+max_tokens = 8192     # Maximum number of tokens in the response
+temperature = 0.0     # Controls randomness for vision model

 # Optional configuration for specific browser configuration
 # [browser]