diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..1ef0e94 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,58 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + open-pull-requests-limit: 4 + groups: + # Group critical packages that might need careful review + core-dependencies: + patterns: + - "pydantic*" + - "openai" + - "fastapi" + - "tiktoken" + browsergym-related: + patterns: + - "browsergym*" + - "browser-use" + - "playwright" + search-tools: + patterns: + - "googlesearch-python" + - "baidusearch" + - "duckduckgo_search" + pre-commit: + patterns: + - "pre-commit" + security-all: + applies-to: "security-updates" + patterns: + - "*" + version-all: + applies-to: "version-updates" + patterns: + - "*" + exclude-patterns: + - "pydantic*" + - "openai" + - "fastapi" + - "tiktoken" + - "browsergym*" + - "browser-use" + - "playwright" + - "googlesearch-python" + - "baidusearch" + - "duckduckgo_search" + - "pre-commit" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + open-pull-requests-limit: 4 + groups: + actions: + patterns: + - "*" diff --git a/.github/workflows/environment-corrupt-check.yaml b/.github/workflows/environment-corrupt-check.yaml new file mode 100644 index 0000000..dc66fe0 --- /dev/null +++ b/.github/workflows/environment-corrupt-check.yaml @@ -0,0 +1,33 @@ +name: Environment Corruption Check +on: + push: + branches: ["main"] + paths: + - requirements.txt + pull_request: + branches: ["main"] + paths: + - requirements.txt +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }} + cancel-in-progress: true +jobs: + test-python-versions: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11.11", "3.12.8", "3.13.2"] + fail-fast: false + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Upgrade pip + run: | + python -m pip install --upgrade pip + - name: Install dependencies + run: | + pip install -r requirements.txt diff --git a/.github/workflows/pr-autodiff.yaml b/.github/workflows/pr-autodiff.yaml new file mode 100644 index 0000000..ed218dc --- /dev/null +++ b/.github/workflows/pr-autodiff.yaml @@ -0,0 +1,127 @@ +name: PR Diff Summarization +on: + # pull_request: + # branches: [main] + # types: [opened, ready_for_review, reopened] + issue_comment: + types: [created] +permissions: + contents: read + pull-requests: write +jobs: + pr-diff-summarization: + runs-on: ubuntu-latest + if: | + (github.event_name == 'pull_request') || + (github.event_name == 'issue_comment' && + contains(github.event.comment.body, '!pr-diff') && + (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && + github.event.issue.pull_request) + steps: + - name: Get PR head SHA + id: get-pr-sha + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "pr_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT + echo "Retrieved PR head SHA: ${{ github.event.pull_request.head.sha }}" + else + PR_URL="${{ github.event.issue.pull_request.url }}" + SHA=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL | jq -r '.head.sha') + echo "pr_sha=$SHA" >> $GITHUB_OUTPUT + echo "Retrieved PR head SHA from API: $SHA" + fi + - name: Check out code + uses: actions/checkout@v4 + with: + ref: ${{ steps.get-pr-sha.outputs.pr_sha }} + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install openai requests + - name: Create and run Python script + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} + run: |- + cat << 'EOF' > /tmp/_workflow_core.py + import os + import subprocess + import json + import requests + from openai import OpenAI + + def get_diff(): + result = subprocess.run( + ['git', 'diff', 'origin/main...HEAD'], + capture_output=True, text=True, check=True) + return '\n'.join( + line for line in result.stdout.split('\n') + if any(line.startswith(c) for c in ('+', '-')) + and not line.startswith(('---', '+++')) + )[:round(200000 * 0.4)] # Truncate to prevent overflow + + def generate_comment(diff_content): + client = OpenAI( + base_url=os.getenv("OPENAI_BASE_URL"), + api_key=os.getenv("OPENAI_API_KEY") + ) + + guidelines = ''' + 1. English version first, Chinese Simplified version after + 2. Example format: + # Diff Report + ## English + - Added `ABC` class + - Fixed `f()` behavior in `foo` module + + ### Comments Highlight + - `config.toml` needs to be configured properly to make sure new features work as expected. + + ### Spelling/Offensive Content Check + - No spelling mistakes or offensive content found in the code or comments. + 3. Highlight non-English comments + 4. Check for spelling/offensive content''' + + response = client.chat.completions.create( + model="o3-mini", + messages=[{ + "role": "system", + "content": "Generate bilingual code review feedback." + }, { + "role": "user", + "content": f"Review these changes per guidelines:\n{guidelines}\n\nDIFF:\n{diff_content}" + }] + ) + return response.choices[0].message.content + + def post_comment(comment): + repo = os.getenv("GITHUB_REPOSITORY") + pr_number = os.getenv("PR_NUMBER") + + headers = { + "Authorization": f"Bearer {os.getenv('GH_TOKEN')}", + "Accept": "application/vnd.github.v3+json" + } + url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" + + requests.post(url, json={"body": comment}, headers=headers) + + if __name__ == "__main__": + diff_content = get_diff() + if not diff_content.strip(): + print("No meaningful diff detected.") + exit(0) + + comment = generate_comment(diff_content) + post_comment(comment) + print("Comment posted successfully.") + EOF + + python /tmp/_workflow_core.py diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index 70d8458..ea52562 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -11,7 +11,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v5 + - uses: actions/stale@v9 with: days-before-issue-stale: 30 days-before-issue-close: 14 diff --git a/.github/workflows/top-issues.yaml b/.github/workflows/top-issues.yaml new file mode 100644 index 0000000..47b6bf0 --- /dev/null +++ b/.github/workflows/top-issues.yaml @@ -0,0 +1,27 @@ +name: Top issues +on: + schedule: + - cron: '0 0/2 * * *' + workflow_dispatch: +jobs: + ShowAndLabelTopIssues: + permissions: + issues: write + pull-requests: write + actions: read + contents: read + name: Display and label top issues + runs-on: ubuntu-latest + if: github.repository == 'mannaandpoem/OpenManus' + steps: + - name: Run top issues action + uses: rickstaa/top-issues-action@7e8dda5d5ae3087670f9094b9724a9a091fc3ba1 # v1.3.101 + env: + github_token: ${{ secrets.GITHUB_TOKEN }} + with: + label: true + dashboard: true + dashboard_show_total_reactions: true + top_issues: true + top_pull_requests: true + top_list_size: 32 diff --git a/.gitignore b/.gitignore index 653fd83..ff8e80d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,14 @@ +### Project-specific ### +# Logs +logs/ + +# Data +data/ + +# Workspace +workspace/ + +### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -170,11 +181,16 @@ cython_debug/ # PyPI configuration file .pypirc -# Logs -logs/ +### Visual Studio Code ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets -# Data -data/ +# Local History for Visual Studio Code +.history/ -# Workspace -workspace/ +# Built Visual Studio Code Extensions +*.vsix diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..f2c6cd0 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,8 @@ +{ + "recommendations": [ + "tamasfe.even-better-toml", + "ms-python.black-formatter", + "ms-python.isort" + ], + "unwantedRecommendations": [] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d3aa302 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,20 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.codeActionsOnSave": { + "source.organizeImports": "always" + } + }, + "[toml]": { + "editor.defaultFormatter": "tamasfe.even-better-toml", + }, + "pre-commit-helper.runOnSave": "none", + "pre-commit-helper.config": ".pre-commit-config.yaml", + "evenBetterToml.schema.enabled": true, + "evenBetterToml.schema.associations": { + "^.+config[/\\\\].+\\.toml$": "../config/schema.config.json" + }, + "files.insertFinalNewline": true, + "files.trimTrailingWhitespace": true, + "editor.formatOnSave": true +} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9f7a190 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.12-slim + +WORKDIR /app/OpenManus + +RUN apt-get update && apt-get install -y --no-install-recommends git curl \ + && rm -rf /var/lib/apt/lists/* \ + && (command -v uv >/dev/null 2>&1 || pip install --no-cache-dir uv) + +COPY . . + +RUN uv pip install --system -r requirements.txt + +CMD ["bash"] diff --git a/README.md b/README.md index ee33f75..d8e5bb0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +

+ +

+ English | [中文](README_zh.md) | [한국어](README_ko.md) | [日本語](README_ja.md) [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers) @@ -65,7 +69,7 @@ cd OpenManus 3. Create a new virtual environment and activate it: ```bash -uv venv +uv venv --python 3.12 source .venv/bin/activate # On Unix/macOS # Or on Windows: # .venv\Scripts\activate @@ -127,6 +131,8 @@ We welcome any friendly suggestions and helpful contributions! Just create issue Or contact @mannaandpoem via 📧email: mannaandpoem@gmail.com +**Note**: Before submitting a pull request, please use the pre-commit tool to check your changes. Run `pre-commit run --all-files` to execute the checks. + ## Community Group Join our networking group on Feishu and share your experience with other developers! @@ -143,7 +149,7 @@ Join our networking group on Feishu and share your experience with other develop Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) and [browser-use](https://github.com/browser-use/browser-use) for providing basic support for this project! -Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT) and [OpenHands](https://github.com/All-Hands-AI/OpenHands). +Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands) and [SWE-agent](https://github.com/SWE-agent/SWE-agent). OpenManus is built by contributors from MetaGPT. Huge thanks to this agent community! diff --git a/README_ja.md b/README_ja.md index 668d9e3..71e5b68 100644 --- a/README_ja.md +++ b/README_ja.md @@ -1,5 +1,8 @@ -[English](README.md) | [中文](README_zh.md) | [한국어](README_ko.md) | 日本語 +

+ +

+[English](README.md) | [中文](README_zh.md) | [한국어](README_ko.md) | 日本語 [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)   @@ -66,7 +69,7 @@ cd OpenManus 3. 新しい仮想環境を作成してアクティベートします: ```bash -uv venv +uv venv --python 3.12 source .venv/bin/activate # Unix/macOSの場合 # Windowsの場合: # .venv\Scripts\activate @@ -128,6 +131,8 @@ python run_flow.py または @mannaandpoem に📧メールでご連絡ください:mannaandpoem@gmail.com +**注意**: プルリクエストを送信する前に、pre-commitツールを使用して変更を確認してください。`pre-commit run --all-files`を実行してチェックを実行します。 + ## コミュニティグループ Feishuのネットワーキンググループに参加して、他の開発者と経験を共有しましょう! @@ -144,7 +149,7 @@ Feishuのネットワーキンググループに参加して、他の開発者 このプロジェクトの基本的なサポートを提供してくれた[anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) と[browser-use](https://github.com/browser-use/browser-use)に感謝します! -さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)にも感謝します。 +さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)、[SWE-agent](https://github.com/SWE-agent/SWE-agent)にも感謝します。 OpenManusはMetaGPTのコントリビューターによって構築されました。このエージェントコミュニティに大きな感謝を! diff --git a/README_ko.md b/README_ko.md index 5cefd84..1a00afb 100644 --- a/README_ko.md +++ b/README_ko.md @@ -1,5 +1,8 @@ -[English](README.md) | [中文](README_zh.md) | 한국어 | [日本語](README_ja.md) +

+ +

+[English](README.md) | [中文](README_zh.md) | 한국어 | [日本語](README_ja.md) [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)   @@ -66,7 +69,7 @@ cd OpenManus 3. 새로운 가상 환경을 생성하고 활성화합니다: ```bash -uv venv +uv venv --python 3.12 source .venv/bin/activate # Unix/macOS의 경우 # Windows의 경우: # .venv\Scripts\activate @@ -128,6 +131,8 @@ python run_flow.py 또는 📧 메일로 연락주세요. @mannaandpoem : mannaandpoem@gmail.com +**참고**: pull request를 제출하기 전에 pre-commit 도구를 사용하여 변경 사항을 확인하십시오. `pre-commit run --all-files`를 실행하여 검사를 실행합니다. + ## 커뮤니티 그룹 Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공유하세요! @@ -144,7 +149,7 @@ Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공 이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와 [browser-use](https://github.com/browser-use/browser-use)에게 감사드립니다! -또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands)에 깊은 감사를 드립니다. +또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands), [SWE-agent](https://github.com/SWE-agent/SWE-agent)에 깊은 감사를 드립니다. OpenManus는 MetaGPT 기여자들에 의해 개발되었습니다. 이 에이전트 커뮤니티에 깊은 감사를 전합니다! diff --git a/README_zh.md b/README_zh.md index 28f6749..15e010b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,8 +1,9 @@ +

+ +

[English](README.md) | 中文 | [한국어](README_ko.md) | [日本語](README_ja.md) - - [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)   [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)   @@ -69,7 +70,7 @@ cd OpenManus 3. 创建并激活虚拟环境: ```bash -uv venv +uv venv --python 3.12 source .venv/bin/activate # Unix/macOS 系统 # Windows 系统使用: # .venv\Scripts\activate @@ -119,7 +120,7 @@ python main.py 然后通过终端输入你的创意! -如需体验开发中版本,可运行: +如需体验不稳定的开发版本,可运行: ```bash python run_flow.py @@ -131,6 +132,8 @@ python run_flow.py 或通过 📧 邮件联系 @mannaandpoem:mannaandpoem@gmail.com +**注意**: 在提交 pull request 之前,请使用 pre-commit 工具检查您的更改。运行 `pre-commit run --all-files` 来执行检查。 + ## 交流群 加入我们的飞书交流群,与其他开发者分享经验! @@ -148,7 +151,7 @@ python run_flow.py 特别感谢 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) 和 [browser-use](https://github.com/browser-use/browser-use) 为本项目提供的基础支持! -此外,我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge),[MetaGPT](https://github.com/geekan/MetaGPT) 和 [OpenHands](https://github.com/All-Hands-AI/OpenHands). +此外,我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge),[MetaGPT](https://github.com/geekan/MetaGPT),[OpenHands](https://github.com/All-Hands-AI/OpenHands) 和 [SWE-agent](https://github.com/SWE-agent/SWE-agent). OpenManus 由 MetaGPT 社区的贡献者共同构建,感谢这个充满活力的智能体开发者社区! diff --git a/app/agent/base.py b/app/agent/base.py index 3830365..fa3db30 100644 --- a/app/agent/base.py +++ b/app/agent/base.py @@ -6,7 +6,7 @@ from pydantic import BaseModel, Field, model_validator from app.llm import LLM from app.logger import logger -from app.schema import AgentState, Memory, Message, ROLE_TYPE +from app.schema import ROLE_TYPE, AgentState, Memory, Message class BaseAgent(BaseModel, ABC): @@ -82,7 +82,7 @@ class BaseAgent(BaseModel, ABC): def update_memory( self, - role: ROLE_TYPE, # type: ignore + role: ROLE_TYPE, # type: ignore content: str, **kwargs, ) -> None: diff --git a/app/agent/manus.py b/app/agent/manus.py index e11ca45..4638c37 100644 --- a/app/agent/manus.py +++ b/app/agent/manus.py @@ -7,8 +7,8 @@ from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT from app.tool import Terminate, ToolCollection from app.tool.browser_use_tool import BrowserUseTool from app.tool.file_saver import FileSaver -from app.tool.google_search import GoogleSearch from app.tool.python_execute import PythonExecute +from app.tool.web_search import WebSearch class Manus(ToolCallAgent): @@ -34,10 +34,13 @@ class Manus(ToolCallAgent): # Add general-purpose tools to the tool collection available_tools: ToolCollection = Field( default_factory=lambda: ToolCollection( - PythonExecute(), GoogleSearch(), BrowserUseTool(), FileSaver(), Terminate() + PythonExecute(), WebSearch(), BrowserUseTool(), FileSaver(), Terminate() ) ) async def _handle_special_tool(self, name: str, result: Any, **kwargs): - await self.available_tools.get_tool(BrowserUseTool().name).cleanup() - await super()._handle_special_tool(name, result, **kwargs) + if not self._is_special_tool(name): + return + else: + await self.available_tools.get_tool(BrowserUseTool().name).cleanup() + await super()._handle_special_tool(name, result, **kwargs) diff --git a/app/agent/planning.py b/app/agent/planning.py index cbd15a0..7e98912 100644 --- a/app/agent/planning.py +++ b/app/agent/planning.py @@ -6,7 +6,7 @@ from pydantic import Field, model_validator from app.agent.toolcall import ToolCallAgent from app.logger import logger from app.prompt.planning import NEXT_STEP_PROMPT, PLANNING_SYSTEM_PROMPT -from app.schema import Message, TOOL_CHOICE_TYPE, ToolCall, ToolChoice +from app.schema import TOOL_CHOICE_TYPE, Message, ToolCall, ToolChoice from app.tool import PlanningTool, Terminate, ToolCollection @@ -27,7 +27,7 @@ class PlanningAgent(ToolCallAgent): available_tools: ToolCollection = Field( default_factory=lambda: ToolCollection(PlanningTool(), Terminate()) ) - tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore + tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name]) tool_calls: List[ToolCall] = Field(default_factory=list) @@ -212,7 +212,7 @@ class PlanningAgent(ToolCallAgent): messages=messages, system_msgs=[Message.system_message(self.system_prompt)], tools=self.available_tools.to_params(), - tool_choice=ToolChoice.REQUIRED, + tool_choice=ToolChoice.AUTO, ) assistant_msg = Message.from_tool_calls( content=response.content, tool_calls=response.tool_calls diff --git a/app/agent/toolcall.py b/app/agent/toolcall.py index 1f04784..29e5af4 100644 --- a/app/agent/toolcall.py +++ b/app/agent/toolcall.py @@ -1,13 +1,13 @@ import json - -from typing import Any, List, Literal, Optional, Union +from typing import Any, List, Optional, Union from pydantic import Field from app.agent.react import ReActAgent +from app.exceptions import TokenLimitExceeded from app.logger import logger from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT -from app.schema import AgentState, Message, ToolCall, TOOL_CHOICE_TYPE, ToolChoice +from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice from app.tool import CreateChatCompletion, Terminate, ToolCollection @@ -26,7 +26,7 @@ class ToolCallAgent(ReActAgent): available_tools: ToolCollection = ToolCollection( CreateChatCompletion(), Terminate() ) - tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore + tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name]) tool_calls: List[ToolCall] = Field(default_factory=list) @@ -40,15 +40,34 @@ class ToolCallAgent(ReActAgent): user_msg = Message.user_message(self.next_step_prompt) self.messages += [user_msg] - # Get response with tool options - response = await self.llm.ask_tool( - messages=self.messages, - system_msgs=[Message.system_message(self.system_prompt)] - if self.system_prompt - else None, - tools=self.available_tools.to_params(), - tool_choice=self.tool_choices, - ) + try: + # Get response with tool options + response = await self.llm.ask_tool( + messages=self.messages, + system_msgs=[Message.system_message(self.system_prompt)] + if self.system_prompt + else None, + tools=self.available_tools.to_params(), + tool_choice=self.tool_choices, + ) + except ValueError: + raise + except Exception as e: + # Check if this is a RetryError containing TokenLimitExceeded + if hasattr(e, "__cause__") and isinstance(e.__cause__, TokenLimitExceeded): + token_limit_error = e.__cause__ + logger.error( + f"🚨 Token limit error (from RetryError): {token_limit_error}" + ) + self.memory.add_message( + Message.assistant_message( + f"Maximum token limit reached, cannot continue execution: {str(token_limit_error)}" + ) + ) + self.state = AgentState.FINISHED + return False + raise + self.tool_calls = response.tool_calls # Log response info diff --git a/app/config.py b/app/config.py index 64f478d..51356a0 100644 --- a/app/config.py +++ b/app/config.py @@ -20,6 +20,10 @@ class LLMSettings(BaseModel): base_url: str = Field(..., description="API base URL") api_key: str = Field(..., description="API key") max_tokens: int = Field(4096, description="Maximum number of tokens per request") + max_input_tokens: Optional[int] = Field( + None, + description="Maximum input tokens to use across all requests (None for unlimited)", + ) temperature: float = Field(1.0, description="Sampling temperature") api_type: str = Field(..., description="AzureOpenai or Openai") api_version: str = Field(..., description="Azure Openai version if AzureOpenai") @@ -31,6 +35,10 @@ class ProxySettings(BaseModel): password: Optional[str] = Field(None, description="Proxy password") +class SearchSettings(BaseModel): + engine: str = Field(default="Google", description="Search engine the llm to use") + + class BrowserSettings(BaseModel): headless: bool = Field(False, description="Whether to run browser in headless mode") disable_security: bool = Field( @@ -58,6 +66,9 @@ class AppConfig(BaseModel): browser_config: Optional[BrowserSettings] = Field( None, description="Browser configuration" ) + search_config: Optional[SearchSettings] = Field( + None, description="Search configuration" + ) class Config: arbitrary_types_allowed = True @@ -111,6 +122,7 @@ class Config: "base_url": base_llm.get("base_url"), "api_key": base_llm.get("api_key"), "max_tokens": base_llm.get("max_tokens", 4096), + "max_input_tokens": base_llm.get("max_input_tokens"), "temperature": base_llm.get("temperature", 1.0), "api_type": base_llm.get("api_type", ""), "api_version": base_llm.get("api_version", ""), @@ -149,6 +161,11 @@ class Config: if valid_browser_params: browser_settings = BrowserSettings(**valid_browser_params) + search_config = raw_config.get("search", {}) + search_settings = None + if search_config: + search_settings = SearchSettings(**search_config) + config_dict = { "llm": { "default": default_settings, @@ -158,6 +175,7 @@ class Config: }, }, "browser_config": browser_settings, + "search_config": search_settings, } self._config = AppConfig(**config_dict) @@ -170,5 +188,9 @@ class Config: def browser_config(self) -> Optional[BrowserSettings]: return self._config.browser_config + @property + def search_config(self) -> Optional[SearchSettings]: + return self._config.search_config + config = Config() diff --git a/app/exceptions.py b/app/exceptions.py index 57a0148..fc90087 100644 --- a/app/exceptions.py +++ b/app/exceptions.py @@ -3,3 +3,11 @@ class ToolError(Exception): def __init__(self, message): self.message = message + + +class OpenManusError(Exception): + """Base exception for all OpenManus errors""" + + +class TokenLimitExceeded(OpenManusError): + """Exception raised when the token limit is exceeded""" diff --git a/app/flow/planning.py b/app/flow/planning.py index a12bbe4..55ec5c9 100644 --- a/app/flow/planning.py +++ b/app/flow/planning.py @@ -124,7 +124,7 @@ class PlanningFlow(BaseFlow): messages=[user_message], system_msgs=[system_message], tools=[self.planning_tool.to_param()], - tool_choice=ToolChoice.REQUIRED, + tool_choice=ToolChoice.AUTO, ) # Process tool calls if present diff --git a/app/llm.py b/app/llm.py index 5b599e8..18a13af 100644 --- a/app/llm.py +++ b/app/llm.py @@ -1,5 +1,6 @@ from typing import Dict, List, Optional, Union +import tiktoken from openai import ( APIError, AsyncAzureOpenAI, @@ -8,11 +9,26 @@ from openai import ( OpenAIError, RateLimitError, ) -from tenacity import retry, stop_after_attempt, wait_random_exponential +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_random_exponential, +) from app.config import LLMSettings, config +from app.exceptions import TokenLimitExceeded from app.logger import logger # Assuming a logger is set up in your app -from app.schema import Message, TOOL_CHOICE_TYPE, ROLE_VALUES, TOOL_CHOICE_VALUES, ToolChoice +from app.schema import ( + ROLE_VALUES, + TOOL_CHOICE_TYPE, + TOOL_CHOICE_VALUES, + Message, + ToolChoice, +) + + +REASONING_MODELS = ["o1", "o3-mini"] class LLM: @@ -40,6 +56,22 @@ class LLM: self.api_key = llm_config.api_key self.api_version = llm_config.api_version self.base_url = llm_config.base_url + + # Add token counting related attributes + self.total_input_tokens = 0 + self.max_input_tokens = ( + llm_config.max_input_tokens + if hasattr(llm_config, "max_input_tokens") + else None + ) + + # Initialize tokenizer + try: + self.tokenizer = tiktoken.encoding_for_model(self.model) + except KeyError: + # If the model is not in tiktoken's presets, use cl100k_base as default + self.tokenizer = tiktoken.get_encoding("cl100k_base") + if self.api_type == "azure": self.client = AsyncAzureOpenAI( base_url=self.base_url, @@ -49,6 +81,79 @@ class LLM: else: self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url) + def count_tokens(self, text: str) -> int: + """Calculate the number of tokens in a text""" + if not text: + return 0 + return len(self.tokenizer.encode(text)) + + def count_message_tokens(self, messages: List[dict]) -> int: + """Calculate the number of tokens in a message list""" + token_count = 0 + for message in messages: + # Base token count for each message (according to OpenAI's calculation method) + token_count += 4 # Base token count for each message + + # Calculate tokens for the role + if "role" in message: + token_count += self.count_tokens(message["role"]) + + # Calculate tokens for the content + if "content" in message and message["content"]: + token_count += self.count_tokens(message["content"]) + + # Calculate tokens for tool calls + if "tool_calls" in message and message["tool_calls"]: + for tool_call in message["tool_calls"]: + if "function" in tool_call: + # Function name + if "name" in tool_call["function"]: + token_count += self.count_tokens( + tool_call["function"]["name"] + ) + # Function arguments + if "arguments" in tool_call["function"]: + token_count += self.count_tokens( + tool_call["function"]["arguments"] + ) + + # Calculate tokens for tool responses + if "name" in message and message["name"]: + token_count += self.count_tokens(message["name"]) + + if "tool_call_id" in message and message["tool_call_id"]: + token_count += self.count_tokens(message["tool_call_id"]) + + # Add extra tokens for message format + token_count += 2 # Extra tokens for message format + + return token_count + + def update_token_count(self, input_tokens: int) -> None: + """Update token counts""" + # Only track tokens if max_input_tokens is set + self.total_input_tokens += input_tokens + logger.info( + f"Token usage: Input={input_tokens}, Cumulative Input={self.total_input_tokens}" + ) + + def check_token_limit(self, input_tokens: int) -> bool: + """Check if token limits are exceeded""" + if self.max_input_tokens is not None: + return (self.total_input_tokens + input_tokens) <= self.max_input_tokens + # If max_input_tokens is not set, always return True + return True + + def get_limit_error_message(self, input_tokens: int) -> str: + """Generate error message for token limit exceeded""" + if ( + self.max_input_tokens is not None + and (self.total_input_tokens + input_tokens) > self.max_input_tokens + ): + return f"Request may exceed input token limit (Current: {self.total_input_tokens}, Needed: {input_tokens}, Max: {self.max_input_tokens})" + + return "Token limit exceeded" + @staticmethod def format_messages(messages: List[Union[dict, Message]]) -> List[dict]: """ @@ -75,14 +180,15 @@ class LLM: formatted_messages = [] for message in messages: + if isinstance(message, Message): + message = message.to_dict() if isinstance(message, dict): - # If message is already a dict, ensure it has required fields + # If message is a dict, ensure it has required fields if "role" not in message: raise ValueError("Message dict must contain 'role' field") - formatted_messages.append(message) - elif isinstance(message, Message): - # If message is a Message object, convert it to dict - formatted_messages.append(message.to_dict()) + if "content" in message or "tool_calls" in message: + formatted_messages.append(message) + # else: do not include the message else: raise TypeError(f"Unsupported message type: {type(message)}") @@ -90,16 +196,15 @@ class LLM: for msg in formatted_messages: if msg["role"] not in ROLE_VALUES: raise ValueError(f"Invalid role: {msg['role']}") - if "content" not in msg and "tool_calls" not in msg: - raise ValueError( - "Message must contain either 'content' or 'tool_calls'" - ) return formatted_messages @retry( wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6), + retry=retry_if_exception_type( + (OpenAIError, Exception, ValueError) + ), # Don't retry TokenLimitExceeded ) async def ask( self, @@ -121,6 +226,7 @@ class LLM: str: The generated response Raises: + TokenLimitExceeded: If token limits are exceeded ValueError: If messages are invalid or response is empty OpenAIError: If API call fails after retries Exception: For unexpected errors @@ -133,27 +239,47 @@ class LLM: else: messages = self.format_messages(messages) + # Calculate input token count + input_tokens = self.count_message_tokens(messages) + + # Check if token limits are exceeded + if not self.check_token_limit(input_tokens): + error_message = self.get_limit_error_message(input_tokens) + # Raise a special exception that won't be retried + raise TokenLimitExceeded(error_message) + + params = { + "model": self.model, + "messages": messages, + } + + if self.model in REASONING_MODELS: + params["max_completion_tokens"] = self.max_tokens + else: + params["max_tokens"] = self.max_tokens + params["temperature"] = ( + temperature if temperature is not None else self.temperature + ) + if not stream: # Non-streaming request - response = await self.client.chat.completions.create( - model=self.model, - messages=messages, - max_tokens=self.max_tokens, - temperature=temperature or self.temperature, - stream=False, - ) + params["stream"] = False + + response = await self.client.chat.completions.create(**params) + if not response.choices or not response.choices[0].message.content: raise ValueError("Empty or invalid response from LLM") + + # Update token counts + self.update_token_count(response.usage.prompt_tokens) + return response.choices[0].message.content - # Streaming request - response = await self.client.chat.completions.create( - model=self.model, - messages=messages, - max_tokens=self.max_tokens, - temperature=temperature or self.temperature, - stream=True, - ) + # Streaming request, For streaming, update estimated token count before making the request + self.update_token_count(input_tokens) + + params["stream"] = True + response = await self.client.chat.completions.create(**params) collected_messages = [] async for chunk in response: @@ -165,13 +291,23 @@ class LLM: full_response = "".join(collected_messages).strip() if not full_response: raise ValueError("Empty response from streaming LLM") + return full_response + except TokenLimitExceeded: + # Re-raise token limit errors without logging + raise except ValueError as ve: logger.error(f"Validation error: {ve}") raise except OpenAIError as oe: logger.error(f"OpenAI API error: {oe}") + if isinstance(oe, AuthenticationError): + logger.error("Authentication failed. Check API key.") + elif isinstance(oe, RateLimitError): + logger.error("Rate limit exceeded. Consider increasing retry attempts.") + elif isinstance(oe, APIError): + logger.error(f"API error: {oe}") raise except Exception as e: logger.error(f"Unexpected error in ask: {e}") @@ -180,6 +316,9 @@ class LLM: @retry( wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6), + retry=retry_if_exception_type( + (OpenAIError, Exception, ValueError) + ), # Don't retry TokenLimitExceeded ) async def ask_tool( self, @@ -187,7 +326,7 @@ class LLM: system_msgs: Optional[List[Union[dict, Message]]] = None, timeout: int = 300, tools: Optional[List[dict]] = None, - tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore + tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore temperature: Optional[float] = None, **kwargs, ): @@ -207,6 +346,7 @@ class LLM: ChatCompletionMessage: The model's response Raises: + TokenLimitExceeded: If token limits are exceeded ValueError: If tools, tool_choice, or messages are invalid OpenAIError: If API call fails after retries Exception: For unexpected errors @@ -223,6 +363,23 @@ class LLM: else: messages = self.format_messages(messages) + # Calculate input token count + input_tokens = self.count_message_tokens(messages) + + # If there are tools, calculate token count for tool descriptions + tools_tokens = 0 + if tools: + for tool in tools: + tools_tokens += self.count_tokens(str(tool)) + + input_tokens += tools_tokens + + # Check if token limits are exceeded + if not self.check_token_limit(input_tokens): + error_message = self.get_limit_error_message(input_tokens) + # Raise a special exception that won't be retried + raise TokenLimitExceeded(error_message) + # Validate tools if provided if tools: for tool in tools: @@ -230,28 +387,43 @@ class LLM: raise ValueError("Each tool must be a dict with 'type' field") # Set up the completion request - response = await self.client.chat.completions.create( - model=self.model, - messages=messages, - temperature=temperature or self.temperature, - max_tokens=self.max_tokens, - tools=tools, - tool_choice=tool_choice, - timeout=timeout, + params = { + "model": self.model, + "messages": messages, + "tools": tools, + "tool_choice": tool_choice, + "timeout": timeout, **kwargs, - ) + } + + if self.model in REASONING_MODELS: + params["max_completion_tokens"] = self.max_tokens + else: + params["max_tokens"] = self.max_tokens + params["temperature"] = ( + temperature if temperature is not None else self.temperature + ) + + response = await self.client.chat.completions.create(**params) # Check if response is valid if not response.choices or not response.choices[0].message: print(response) raise ValueError("Invalid or empty response from LLM") + # Update token counts + self.update_token_count(response.usage.prompt_tokens) + return response.choices[0].message + except TokenLimitExceeded: + # Re-raise token limit errors without logging + raise except ValueError as ve: logger.error(f"Validation error in ask_tool: {ve}") raise except OpenAIError as oe: + logger.error(f"OpenAI API error: {oe}") if isinstance(oe, AuthenticationError): logger.error("Authentication failed. Check API key.") elif isinstance(oe, RateLimitError): diff --git a/app/prompt/manus.py b/app/prompt/manus.py index e46c793..6dcca8a 100644 --- a/app/prompt/manus.py +++ b/app/prompt/manus.py @@ -8,7 +8,7 @@ FileSaver: Save files locally, such as txt, py, html, etc. BrowserUseTool: Open, browse, and use web browsers.If you open a local HTML file, you must provide the absolute path to the file. -GoogleSearch: Perform web information retrieval +WebSearch: Perform web information retrieval Terminate: End the current interaction when the task is complete or when you need additional information from the user. Use this tool to signal that you've finished addressing the user's request or need clarification before proceeding further. diff --git a/app/schema.py b/app/schema.py index 30ccf6c..fb89c3c 100644 --- a/app/schema.py +++ b/app/schema.py @@ -3,25 +3,32 @@ from typing import Any, List, Literal, Optional, Union from pydantic import BaseModel, Field + class Role(str, Enum): """Message role options""" + SYSTEM = "system" USER = "user" - ASSISTANT = "assistant" + ASSISTANT = "assistant" TOOL = "tool" + ROLE_VALUES = tuple(role.value for role in Role) ROLE_TYPE = Literal[ROLE_VALUES] # type: ignore + class ToolChoice(str, Enum): """Tool choice options""" + NONE = "none" AUTO = "auto" REQUIRED = "required" + TOOL_CHOICE_VALUES = tuple(choice.value for choice in ToolChoice) TOOL_CHOICE_TYPE = Literal[TOOL_CHOICE_VALUES] # type: ignore + class AgentState(str, Enum): """Agent execution states""" @@ -47,7 +54,7 @@ class ToolCall(BaseModel): class Message(BaseModel): """Represents a chat message in the conversation""" - role: ROLE_TYPE = Field(...) # type: ignore + role: ROLE_TYPE = Field(...) # type: ignore content: Optional[str] = Field(default=None) tool_calls: Optional[List[ToolCall]] = Field(default=None) name: Optional[str] = Field(default=None) @@ -104,7 +111,9 @@ class Message(BaseModel): @classmethod def tool_message(cls, content: str, name, tool_call_id: str) -> "Message": """Create a tool message""" - return cls(role=Role.TOOL, content=content, name=name, tool_call_id=tool_call_id) + return cls( + role=Role.TOOL, content=content, name=name, tool_call_id=tool_call_id + ) @classmethod def from_tool_calls( diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index 57ad03c..ad0cfa1 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -106,7 +106,7 @@ class BrowserUseTool(BaseTool): async def _ensure_browser_initialized(self) -> BrowserContext: """Ensure browser and context are initialized.""" if self.browser is None: - browser_config_kwargs = {"headless": False} + browser_config_kwargs = {"headless": False, "disable_security": True} if config.browser_config: from browser_use.browser.browser import ProxySettings diff --git a/app/tool/file_saver.py b/app/tool/file_saver.py index d6a3766..7d92a02 100644 --- a/app/tool/file_saver.py +++ b/app/tool/file_saver.py @@ -2,6 +2,7 @@ import os import aiofiles +from app.config import WORKSPACE_ROOT from app.tool.base import BaseTool @@ -45,15 +46,22 @@ The tool accepts content and a file path, and saves the content to that location str: A message indicating the result of the operation. """ try: + # Place the generated file in the workspace directory + if os.path.isabs(file_path): + file_name = os.path.basename(file_path) + full_path = os.path.join(WORKSPACE_ROOT, file_name) + else: + full_path = os.path.join(WORKSPACE_ROOT, file_path) + # Ensure the directory exists - directory = os.path.dirname(file_path) + directory = os.path.dirname(full_path) if directory and not os.path.exists(directory): os.makedirs(directory) # Write directly to the file - async with aiofiles.open(file_path, mode, encoding="utf-8") as file: + async with aiofiles.open(full_path, mode, encoding="utf-8") as file: await file.write(content) - return f"Content successfully saved to {file_path}" + return f"Content successfully saved to {full_path}" except Exception as e: return f"Error saving file: {str(e)}" diff --git a/app/tool/google_search.py b/app/tool/google_search.py deleted file mode 100644 index ed5d7d5..0000000 --- a/app/tool/google_search.py +++ /dev/null @@ -1,48 +0,0 @@ -import asyncio -from typing import List - -from googlesearch import search - -from app.tool.base import BaseTool - - -class GoogleSearch(BaseTool): - name: str = "google_search" - description: str = """Perform a Google search and return a list of relevant links. -Use this tool when you need to find information on the web, get up-to-date data, or research specific topics. -The tool returns a list of URLs that match the search query. -""" - parameters: dict = { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "(required) The search query to submit to Google.", - }, - "num_results": { - "type": "integer", - "description": "(optional) The number of search results to return. Default is 10.", - "default": 10, - }, - }, - "required": ["query"], - } - - async def execute(self, query: str, num_results: int = 10) -> List[str]: - """ - Execute a Google search and return a list of URLs. - - Args: - query (str): The search query to submit to Google. - num_results (int, optional): The number of search results to return. Default is 10. - - Returns: - List[str]: A list of URLs matching the search query. - """ - # Run the search in a thread pool to prevent blocking - loop = asyncio.get_event_loop() - links = await loop.run_in_executor( - None, lambda: list(search(query, num_results=num_results)) - ) - - return links diff --git a/app/tool/python_execute.py b/app/tool/python_execute.py index 88e1aab..08ceffa 100644 --- a/app/tool/python_execute.py +++ b/app/tool/python_execute.py @@ -1,4 +1,6 @@ -import threading +import multiprocessing +import sys +from io import StringIO from typing import Dict from app.tool.base import BaseTool @@ -20,6 +22,20 @@ class PythonExecute(BaseTool): "required": ["code"], } + def _run_code(self, code: str, result_dict: dict, safe_globals: dict) -> None: + original_stdout = sys.stdout + try: + output_buffer = StringIO() + sys.stdout = output_buffer + exec(code, safe_globals, safe_globals) + result_dict["observation"] = output_buffer.getvalue() + result_dict["success"] = True + except Exception as e: + result_dict["observation"] = str(e) + result_dict["success"] = False + finally: + sys.stdout = original_stdout + async def execute( self, code: str, @@ -35,36 +51,25 @@ class PythonExecute(BaseTool): Returns: Dict: Contains 'output' with execution output or error message and 'success' status. """ - result = {"observation": ""} - def run_code(): - try: - safe_globals = {"__builtins__": dict(__builtins__)} + with multiprocessing.Manager() as manager: + result = manager.dict({"observation": "", "success": False}) + if isinstance(__builtins__, dict): + safe_globals = {"__builtins__": __builtins__} + else: + safe_globals = {"__builtins__": __builtins__.__dict__.copy()} + proc = multiprocessing.Process( + target=self._run_code, args=(code, result, safe_globals) + ) + proc.start() + proc.join(timeout) - import sys - from io import StringIO - - output_buffer = StringIO() - sys.stdout = output_buffer - - exec(code, safe_globals, {}) - - sys.stdout = sys.__stdout__ - - result["observation"] = output_buffer.getvalue() - - except Exception as e: - result["observation"] = str(e) - result["success"] = False - - thread = threading.Thread(target=run_code) - thread.start() - thread.join(timeout) - - if thread.is_alive(): - return { - "observation": f"Execution timeout after {timeout} seconds", - "success": False, - } - - return result + # timeout process + if proc.is_alive(): + proc.terminate() + proc.join(1) + return { + "observation": f"Execution timeout after {timeout} seconds", + "success": False, + } + return dict(result) diff --git a/app/tool/search/__init__.py b/app/tool/search/__init__.py new file mode 100644 index 0000000..4f486ac --- /dev/null +++ b/app/tool/search/__init__.py @@ -0,0 +1,12 @@ +from app.tool.search.baidu_search import BaiduSearchEngine +from app.tool.search.base import WebSearchEngine +from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine +from app.tool.search.google_search import GoogleSearchEngine + + +__all__ = [ + "WebSearchEngine", + "BaiduSearchEngine", + "DuckDuckGoSearchEngine", + "GoogleSearchEngine", +] diff --git a/app/tool/search/baidu_search.py b/app/tool/search/baidu_search.py new file mode 100644 index 0000000..d415ce8 --- /dev/null +++ b/app/tool/search/baidu_search.py @@ -0,0 +1,9 @@ +from baidusearch.baidusearch import search + +from app.tool.search.base import WebSearchEngine + + +class BaiduSearchEngine(WebSearchEngine): + def perform_search(self, query, num_results=10, *args, **kwargs): + """Baidu search engine.""" + return search(query, num_results=num_results) diff --git a/app/tool/search/base.py b/app/tool/search/base.py new file mode 100644 index 0000000..3132381 --- /dev/null +++ b/app/tool/search/base.py @@ -0,0 +1,17 @@ +class WebSearchEngine(object): + def perform_search( + self, query: str, num_results: int = 10, *args, **kwargs + ) -> list[dict]: + """ + Perform a web search and return a list of URLs. + + Args: + query (str): The search query to submit to the search engine. + num_results (int, optional): The number of search results to return. Default is 10. + args: Additional arguments. + kwargs: Additional keyword arguments. + + Returns: + List: A list of dict matching the search query. + """ + raise NotImplementedError diff --git a/app/tool/search/duckduckgo_search.py b/app/tool/search/duckduckgo_search.py new file mode 100644 index 0000000..3dd5c52 --- /dev/null +++ b/app/tool/search/duckduckgo_search.py @@ -0,0 +1,9 @@ +from duckduckgo_search import DDGS + +from app.tool.search.base import WebSearchEngine + + +class DuckDuckGoSearchEngine(WebSearchEngine): + async def perform_search(self, query, num_results=10, *args, **kwargs): + """DuckDuckGo search engine.""" + return DDGS.text(query, num_results=num_results) diff --git a/app/tool/search/google_search.py b/app/tool/search/google_search.py new file mode 100644 index 0000000..425106d --- /dev/null +++ b/app/tool/search/google_search.py @@ -0,0 +1,9 @@ +from googlesearch import search + +from app.tool.search.base import WebSearchEngine + + +class GoogleSearchEngine(WebSearchEngine): + def perform_search(self, query, num_results=10, *args, **kwargs): + """Google search engine.""" + return search(query, num_results=num_results) diff --git a/app/tool/terminal.py b/app/tool/terminal.py index df5996e..86b401c 100644 --- a/app/tool/terminal.py +++ b/app/tool/terminal.py @@ -40,7 +40,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that str: The output, and error of the command execution. """ # Split the command by & to handle multiple commands - commands = [cmd.strip() for cmd in command.split('&') if cmd.strip()] + commands = [cmd.strip() for cmd in command.split("&") if cmd.strip()] final_output = CLIResult(output="", error="") for cmd in commands: @@ -61,7 +61,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that stdout, stderr = await self.process.communicate() result = CLIResult( output=stdout.decode().strip(), - error=stderr.decode().strip() + error=stderr.decode().strip(), ) except Exception as e: result = CLIResult(output="", error=str(e)) @@ -70,9 +70,13 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that # Combine outputs if result.output: - final_output.output += (result.output + "\n") if final_output.output else result.output + final_output.output += ( + (result.output + "\n") if final_output.output else result.output + ) if result.error: - final_output.error += (result.error + "\n") if final_output.error else result.error + final_output.error += ( + (result.error + "\n") if final_output.error else result.error + ) # Remove trailing newlines final_output.output = final_output.output.rstrip() @@ -124,14 +128,10 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that if os.path.isdir(new_path): self.current_path = new_path return CLIResult( - output=f"Changed directory to {self.current_path}", - error="" + output=f"Changed directory to {self.current_path}", error="" ) else: - return CLIResult( - output="", - error=f"No such directory: {new_path}" - ) + return CLIResult(output="", error=f"No such directory: {new_path}") except Exception as e: return CLIResult(output="", error=str(e)) @@ -152,7 +152,7 @@ Note: You MUST append a `sleep 0.05` to the end of the command for commands that parts = shlex.split(command) if any(cmd in dangerous_commands for cmd in parts): raise ValueError("Use of dangerous commands is restricted.") - except Exception as e: + except Exception: # If shlex.split fails, try basic string comparison if any(cmd in command for cmd in dangerous_commands): raise ValueError("Use of dangerous commands is restricted.") diff --git a/app/tool/web_search.py b/app/tool/web_search.py new file mode 100644 index 0000000..7b1018b --- /dev/null +++ b/app/tool/web_search.py @@ -0,0 +1,99 @@ +import asyncio +from typing import List + +from tenacity import retry, stop_after_attempt, wait_exponential + +from app.config import config +from app.tool.base import BaseTool +from app.tool.search import ( + BaiduSearchEngine, + DuckDuckGoSearchEngine, + GoogleSearchEngine, + WebSearchEngine, +) + + +class WebSearch(BaseTool): + name: str = "web_search" + description: str = """Perform a web search and return a list of relevant links. + This function attempts to use the primary search engine API to get up-to-date results. + If an error occurs, it falls back to an alternative search engine.""" + parameters: dict = { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "(required) The search query to submit to the search engine.", + }, + "num_results": { + "type": "integer", + "description": "(optional) The number of search results to return. Default is 10.", + "default": 10, + }, + }, + "required": ["query"], + } + _search_engine: dict[str, WebSearchEngine] = { + "google": GoogleSearchEngine(), + "baidu": BaiduSearchEngine(), + "duckduckgo": DuckDuckGoSearchEngine(), + } + + async def execute(self, query: str, num_results: int = 10) -> List[str]: + """ + Execute a Web search and return a list of URLs. + + Args: + query (str): The search query to submit to the search engine. + num_results (int, optional): The number of search results to return. Default is 10. + + Returns: + List[str]: A list of URLs matching the search query. + """ + engine_order = self._get_engine_order() + for engine_name in engine_order: + engine = self._search_engine[engine_name] + try: + links = await self._perform_search_with_engine( + engine, query, num_results + ) + if links: + return links + except Exception as e: + print(f"Search engine '{engine_name}' failed with error: {e}") + return [] + + def _get_engine_order(self) -> List[str]: + """ + Determines the order in which to try search engines. + Preferred engine is first (based on configuration), followed by the remaining engines. + + Returns: + List[str]: Ordered list of search engine names. + """ + preferred = "google" + if config.search_config and config.search_config.engine: + preferred = config.search_config.engine.lower() + + engine_order = [] + if preferred in self._search_engine: + engine_order.append(preferred) + for key in self._search_engine: + if key not in engine_order: + engine_order.append(key) + return engine_order + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=1, max=10), + ) + async def _perform_search_with_engine( + self, + engine: WebSearchEngine, + query: str, + num_results: int, + ) -> List[str]: + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, lambda: list(engine.perform_search(query, num_results=num_results)) + ) diff --git a/assets/logo.jpg b/assets/logo.jpg new file mode 100644 index 0000000..634b8f6 Binary files /dev/null and b/assets/logo.jpg differ diff --git a/config/.gitignore b/config/.gitignore new file mode 100644 index 0000000..eaff182 --- /dev/null +++ b/config/.gitignore @@ -0,0 +1,2 @@ +# prevent the local config file from being uploaded to the remote repository +config.toml diff --git a/config/config.example.toml b/config/config.example.toml index 13648dd..51b8ead 100644 --- a/config/config.example.toml +++ b/config/config.example.toml @@ -1,10 +1,10 @@ # Global LLM configuration [llm] -model = "claude-3-5-sonnet" -base_url = "https://api.openai.com/v1" -api_key = "sk-..." -max_tokens = 4096 -temperature = 0.0 +model = "claude-3-7-sonnet-20250219" # The LLM model to use +base_url = "https://api.anthropic.com/v1/" # API endpoint URL +api_key = "YOUR_API_KEY" # Your API key +max_tokens = 8192 # Maximum number of tokens in the response +temperature = 0.0 # Controls randomness # [llm] #AZURE OPENAI: # api_type= 'azure' @@ -15,11 +15,29 @@ temperature = 0.0 # temperature = 0.0 # api_version="AZURE API VERSION" #"2024-08-01-preview" +# [llm] #OLLAMA: +# api_type = 'ollama' +# model = "llama3.2" +# base_url = "http://localhost:11434/v1" +# api_key = "ollama" +# max_tokens = 4096 +# temperature = 0.0 + # Optional configuration for specific LLM models [llm.vision] -model = "claude-3-5-sonnet" -base_url = "https://api.openai.com/v1" -api_key = "sk-..." +model = "claude-3-7-sonnet-20250219" # The vision model to use +base_url = "https://api.anthropic.com/v1/" # API endpoint URL for vision model +api_key = "YOUR_API_KEY" # Your API key for vision model +max_tokens = 8192 # Maximum number of tokens in the response +temperature = 0.0 # Controls randomness for vision model + +# [llm.vision] #OLLAMA VISION: +# api_type = 'ollama' +# model = "llama3.2-vision" +# base_url = "http://localhost:11434/v1" +# api_key = "ollama" +# max_tokens = 4096 +# temperature = 0.0 # Optional configuration for specific browser configuration # [browser] @@ -42,3 +60,8 @@ api_key = "sk-..." # server = "http://proxy-server:port" # username = "proxy-username" # password = "proxy-password" + +# Optional configuration, Search settings. +# [search] +# Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo". +#engine = "Google" diff --git a/requirements.txt b/requirements.txt index 7ce4b52..2dcaabd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ -pydantic~=2.10.4 -openai~=1.58.1 +pydantic~=2.10.6 +openai~=1.66.3 tenacity~=9.0.0 pyyaml~=6.0.2 loguru~=0.7.3 numpy datasets~=3.2.0 fastapi~=0.115.11 +tiktoken~=0.9.0 html2text~=2024.2.26 gymnasium~=1.0.0 @@ -15,8 +16,10 @@ uvicorn~=0.34.0 unidiff~=0.7.5 browser-use~=0.1.40 googlesearch-python~=1.3.0 +baidusearch~=1.0.3 +duckduckgo_search~=7.5.1 aiofiles~=24.1.0 pydantic_core~=2.27.2 colorama~=0.4.6 -playwright~=1.49.1 +playwright~=1.50.0 diff --git a/setup.py b/setup.py index dd46f9c..eb36dac 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( packages=find_packages(), install_requires=[ "pydantic~=2.10.4", - "openai~=1.58.1", + "openai>=1.58.1,<1.67.0", "tenacity~=9.0.0", "pyyaml~=6.0.2", "loguru~=0.7.3", @@ -31,7 +31,7 @@ setup( "browser-use~=0.1.40", "googlesearch-python~=1.3.0", "aiofiles~=24.1.0", - "pydantic_core~=2.27.2", + "pydantic_core>=2.27.2,<2.28.0", "colorama~=0.4.6", ], classifiers=[