Compare commits

...

52 Commits

Author SHA1 Message Date
Sheng Fan
c432ec9286 chore(boto3): restore boto3 dependency 2025-03-21 18:46:48 +08:00
Sheng Fan
65470c2ae9 style(pre-commit): fix 2025-03-21 18:25:58 +08:00
Sheng Fan
30342247c0 feat: Add shortcut script for launching OpenManus MCP server 2025-03-21 18:24:23 +08:00
liangxinbing
a61ef9b737 Comment boto3 to resolve dependency conflicts for requirements.txt 2025-03-21 17:54:22 +08:00
liangxinbing
82e3140357 update structure of flow 2025-03-21 17:37:16 +08:00
liangxinbing
d0492a500e update run_default for run_mcp.py 2025-03-21 17:36:29 +08:00
mannaandpoem
35209978e1
Merge pull request #883 from leeseett/main
Remove merge conflict hint code
2025-03-21 13:09:42 +08:00
leeseett
3dd990e554
删除合并冲突提示代码 2025-03-21 11:49:26 +08:00
Sheng Fan
e218c0655f
Merge pull request #882 from fred913/patch/mcp-server
Patch/mcp server
2025-03-21 11:28:03 +08:00
Sheng Fan
5d18b5dc69
chore: remove useless file 2025-03-21 11:25:16 +08:00
Sheng Fan
567bffb441 style: pre-commit 2025-03-21 11:23:55 +08:00
Sheng Fan
acb435f9f5
Merge branch 'mannaandpoem:main' into patch/mcp-server 2025-03-21 11:22:07 +08:00
mannaandpoem
d63e88f089
Merge pull request #772 from a-holm/fix-for-search-rate-limits
feat(search): Add configurable fallback engines and retry logic for robust web search
2025-03-21 01:56:21 +08:00
mannaandpoem
c3de3ad6f7
Merge pull request #862 from Shellmode/main
Add Amazon Bedrock support modular non-intrusive
2025-03-20 16:19:28 +08:00
Sheng Fan
08a20f6880 chore(mcp.server): remove irregular environment patch
refactor(mcp.server): prevent browser-use from affecting mcp stdio communication
2025-03-20 13:25:56 +08:00
Array
2a13cb49f3 feat: modular non-intrusive Amazon Bedrock support, add config example 2025-03-20 12:25:53 +08:00
Array
4e10b42b30 feat: modular non-intrusive Amazon Bedrock support 2025-03-20 12:15:34 +08:00
liangxinbing
d4358ef537 update requirements.txt 2025-03-20 09:36:27 +08:00
liangxinbing
dda83efaf0 add mcp prompt 2025-03-20 09:30:35 +08:00
liangxinbing
721d863642 update README 2025-03-20 09:16:10 +08:00
liangxinbing
6fa0b1be95 update SWEAgent 2025-03-20 09:02:50 +08:00
liangxinbing
e5808d1a90 update mcp_tools to mcp_clients 2025-03-20 09:02:28 +08:00
liangxinbing
74f438bde3 update mcp name 2025-03-20 02:03:20 +08:00
liangxinbing
8fca2ff1b7 Merge remote-tracking branch 'origin/main' 2025-03-20 01:39:58 +08:00
liangxinbing
14fa48e8d7 add mcp 2025-03-20 01:10:04 +08:00
Johan Holm
59a92257be Merge branch 'main' of https://github.com/a-holm/OpenManus into fix-for-search-rate-limits 2025-03-19 10:50:25 +01:00
Sheng Fan
f25ed7d49e
Merge pull request #839 from fred913/main
fix(browser_use_tool): reimplement screenshot logics to get JPEG data
2025-03-19 17:03:15 +08:00
Sheng Fan
44243a1b97 fix(browser_use_tool): reimplement screenshot logics to get JPEG data 2025-03-19 16:57:12 +08:00
mannaandpoem
3c7e378969
Merge pull request #797 from fred913/main
Several improvements
2025-03-19 15:53:29 +08:00
Sheng Fan
b9df45bc68 chore(app/tool): remove constructor in LocalFileOperator 2025-03-19 15:44:06 +08:00
Sheng Fan
d644d976b0 fix: pre-commit 2025-03-19 14:12:46 +08:00
Sheng Fan
7e3609f19f chore(file_operators): use utf-8 as default encoding 2025-03-19 14:10:07 +08:00
Sheng Fan
94e2ab7c86 fix(llm): accept empty choices as valid response and handle that case gracefully 2025-03-19 14:09:46 +08:00
Sheng Fan
4ea7f8e988 merge: main from upstream 2025-03-19 13:34:43 +08:00
Sheng Fan
d7b3f9a5c3 fix(pr-autodiff): make sure compare does correctly 2025-03-19 13:32:31 +08:00
liangxinbing
b7dcbfecb3 update extract_content action for BrowserUseTool 2025-03-19 13:27:08 +08:00
liangxinbing
402355533c format code 2025-03-19 13:24:49 +08:00
liangxinbing
7b38dd7fbc update format_messages 2025-03-19 13:24:12 +08:00
mannaandpoem
d5a662cbcc
Merge pull request #828 from minbang930/update/config-api-type-description
update api_type field description to include Ollama
2025-03-19 13:02:49 +08:00
minbang930
1279d77cca update api_type field description to include Ollama
Clarify the description of the api_type field in LLMSettings to accurately
reflect all supported types including Azure, OpenAI, and Ollama.
This makes the documentation consistent with the example configuration.
2025-03-19 10:52:58 +09:00
a-holm
855caad4d9 Merge branch 'fix-for-search-rate-limits' of https://github.com/a-holm/OpenManus into fix-for-search-rate-limits 2025-03-18 21:53:34 +01:00
a-holm
95e3487402 Merge branch 'main' of https://github.com/a-holm/OpenManus into fix-for-search-rate-limits 2025-03-18 21:47:16 +01:00
Johan A. Holm
fe44fe726d
Merge branch 'main' into fix-for-search-rate-limits 2025-03-18 20:51:45 +01:00
Sheng Fan
2fad2904d7 ci(pr-autodiff): add Chinese explicit shot 2025-03-18 21:10:43 +08:00
Sheng Fan
0654d36e40 ci: update Markdown issue templates to forms 2025-03-18 17:23:09 +08:00
Johan Holm
c7858c2eb4 Make sure to only include fallback search engines 2025-03-18 09:53:30 +01:00
Sheng Fan
3d5b09222e Merge branch 'main' of https://github.com/mannaandpoem/OpenManus 2025-03-18 16:28:40 +08:00
Sheng Fan
cf7d6c1207 chore(app): Update error logging to use exception details 2025-03-18 13:36:15 +08:00
Sheng Fan
ca612699ec refactor(app): explicitly specify LLM request parameters to allow typing 2025-03-18 11:53:47 +08:00
Sheng Fan
aa512fac6e refactor(app): Complete exception logging in LLM.ask 2025-03-18 11:46:35 +08:00
Johan Holm
9fa12e594c update from pre-commit 2025-03-17 11:05:03 +01:00
Johan Holm
711c2805e4 feat(search): Add robust fallback system with configurable retries and enhanced error handling
- Implement multi-engine failover system with configurable fallback order
    - Add retry logic with exponential backoff and rate limit detection
    - Introduce search configuration options:
      * fallback_engines: Ordered list of backup search providers
      * retry_delay: Seconds between retry batches (default: 60)
      * max_retries: Maximum system-wide retry attempts (default: 3)
    - Improve error resilience with:
      - Automatic engine switching on 429/Too Many Requests
      - Full system retries after configurable cooldown periods
      - Detailed logging for diagnostics and monitoring
    - Enhance engine prioritization logic:
      1. Primary configured engine
      2. Configured fallback engines
      3. Remaining available engines

    Example configuration:
    [search]
    engine = "Google"
    fallback_engines = ["DuckDuckGo", "Baidu"]  # Cascading fallback order
    retry_delay = 60                            # 1 minute between retry batches
    max_retries = 3                             # Attempt 3 full system retries

    This addresses critical reliability issues by:
    - Preventing search failures due to single-engine rate limits
    - Enabling recovery from transient network errors
    - Providing operational flexibility through configurable parameters
    - Improving visibility through granular logging (INFO/WARN/ERROR)
2025-03-17 10:43:42 +01:00
34 changed files with 1475 additions and 207 deletions

View File

@ -1,14 +0,0 @@
---
name: "🤔 Request new features"
about: Suggest ideas or features youd like to see implemented in OpenManus.
title: ''
labels: kind/features
assignees: ''
---
**Feature description**
<!-- Provide a clear and concise description of the proposed feature -->
**Your Feature**
<!-- Explain your idea or implementation process. Optionally, include a Pull Request URL. -->
<!-- Ensure accompanying docs/tests/examples are provided for review. -->

View File

@ -0,0 +1,21 @@
name: "🤔 Request new features"
description: Suggest ideas or features youd like to see implemented in OpenManus.
labels: enhancement
body:
- type: textarea
id: feature-description
attributes:
label: Feature description
description: |
Provide a clear and concise description of the proposed feature
validations:
required: true
- type: textarea
id: your-feature
attributes:
label: Your Feature
description: |
Explain your idea or implementation process, if any. Optionally, include a Pull Request URL.
Ensure accompanying docs/tests/examples are provided for review.
validations:
required: false

View File

@ -1,25 +0,0 @@
---
name: "🪲 Show me the Bug"
about: Report a bug encountered while using OpenManus and seek assistance.
title: ''
labels: kind/bug
assignees: ''
---
**Bug description**
<!-- Clearly describe the bug you encountered -->
**Bug solved method**
<!-- If resolved, explain the solution. Optionally, include a Pull Request URL. -->
<!-- If unresolved, provide additional details to aid investigation -->
**Environment information**
<!-- System: e.g., Ubuntu 22.04, Python: e.g., 3.12, OpenManus version: e.g., 0.1.0 -->
- System version:
- Python version:
- OpenManus version or branch:
- Installation method (e.g., `pip install -r requirements.txt` or `pip install -e .`):
**Screenshots or logs**
<!-- Attach screenshots or logs to help diagnose the issue -->

View File

@ -0,0 +1,44 @@
name: "🪲 Show me the Bug"
description: Report a bug encountered while using OpenManus and seek assistance.
labels: bug
body:
- type: textarea
id: bug-description
attributes:
label: Bug Description
description: |
Clearly describe the bug you encountered
validations:
required: true
- type: textarea
id: solve-method
attributes:
label: Bug solved method
description: |
If resolved, explain the solution. Optionally, include a Pull Request URL.
If unresolved, provide additional details to aid investigation
validations:
required: true
- type: textarea
id: environment-information
attributes:
label: Environment information
description: |
System: e.g., Ubuntu 22.04
Python: e.g., 3.12
OpenManus version: e.g., 0.1.0
value: |
- System version:
- Python version:
- OpenManus version or branch:
- Installation method (e.g., `pip install -r requirements.txt` or `pip install -e .`):
validations:
required: true
- type: textarea
id: extra-information
attributes:
label: Extra information
description: |
For example, attach screenshots or logs to help diagnose the issue
validations:
required: false

View File

@ -15,21 +15,20 @@ jobs:
(github.event_name == 'pull_request') || (github.event_name == 'pull_request') ||
(github.event_name == 'issue_comment' && (github.event_name == 'issue_comment' &&
contains(github.event.comment.body, '!pr-diff') && contains(github.event.comment.body, '!pr-diff') &&
(github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && (github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') &&
github.event.issue.pull_request) github.event.issue.pull_request)
steps: steps:
- name: Get PR head SHA - name: Get PR head SHA
id: get-pr-sha id: get-pr-sha
run: | run: |
if [ "${{ github.event_name }}" == "pull_request" ]; then PR_URL="${{ github.event.issue.pull_request.url || github.event.pull_request.url }}"
echo "pr_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT # https://api.github.com/repos/OpenManus/pulls/1
echo "Retrieved PR head SHA: ${{ github.event.pull_request.head.sha }}" RESPONSE=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL)
else SHA=$(echo $RESPONSE | jq -r '.head.sha')
PR_URL="${{ github.event.issue.pull_request.url }}" TARGET_BRANCH=$(echo $RESPONSE | jq -r '.base.ref')
SHA=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL | jq -r '.head.sha') echo "pr_sha=$SHA" >> $GITHUB_OUTPUT
echo "pr_sha=$SHA" >> $GITHUB_OUTPUT echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT
echo "Retrieved PR head SHA from API: $SHA" echo "Retrieved PR head SHA from API: $SHA, target branch: $TARGET_BRANCH"
fi
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
@ -49,6 +48,7 @@ jobs:
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
GH_TOKEN: ${{ github.token }} GH_TOKEN: ${{ github.token }}
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
TARGET_BRANCH: ${{ steps.get-pr-sha.outputs.target_branch }}
run: |- run: |-
cat << 'EOF' > /tmp/_workflow_core.py cat << 'EOF' > /tmp/_workflow_core.py
import os import os
@ -59,7 +59,7 @@ jobs:
def get_diff(): def get_diff():
result = subprocess.run( result = subprocess.run(
['git', 'diff', 'origin/main...HEAD'], ['git', 'diff', 'origin/' + os.getenv('TARGET_BRANCH') + '...HEAD'],
capture_output=True, text=True, check=True) capture_output=True, text=True, check=True)
return '\n'.join( return '\n'.join(
line for line in result.stdout.split('\n') line for line in result.stdout.split('\n')
@ -86,6 +86,17 @@ jobs:
### Spelling/Offensive Content Check ### Spelling/Offensive Content Check
- No spelling mistakes or offensive content found in the code or comments. - No spelling mistakes or offensive content found in the code or comments.
## 中文(简体)
- 新增了 `ABC` 类
- `foo` 模块中的 `f()` 行为已修复
### 评论高亮
- `config.toml` 需要正确配置才能确保新功能正常运行。
### 内容检查
- 没有发现代码或注释中的拼写错误或不当措辞。
3. Highlight non-English comments 3. Highlight non-English comments
4. Check for spelling/offensive content''' 4. Check for spelling/offensive content'''

View File

@ -81,6 +81,11 @@ source .venv/bin/activate # On Unix/macOS
uv pip install -r requirements.txt uv pip install -r requirements.txt
``` ```
### Browser Automation Tool (Optional)
```bash
playwright install
```
## Configuration ## Configuration
OpenManus requires configuration for the LLM APIs it uses. Follow these steps to set up your configuration: OpenManus requires configuration for the LLM APIs it uses. Follow these steps to set up your configuration:
@ -119,7 +124,12 @@ python main.py
Then input your idea via terminal! Then input your idea via terminal!
For unstable version, you also can run: For MCP tool version, you can run:
```bash
python run_mcp.py
```
For unstable multi-agent version, you also can run:
```bash ```bash
python run_flow.py python run_flow.py

View File

@ -81,6 +81,11 @@ source .venv/bin/activate # Unix/macOSの場合
uv pip install -r requirements.txt uv pip install -r requirements.txt
``` ```
### ブラウザ自動化ツール(オプション)
```bash
playwright install
```
## 設定 ## 設定
OpenManusを使用するには、LLM APIの設定が必要です。以下の手順に従って設定してください OpenManusを使用するには、LLM APIの設定が必要です。以下の手順に従って設定してください
@ -119,7 +124,12 @@ python main.py
その後、ターミナルからプロンプトを入力してください! その後、ターミナルからプロンプトを入力してください!
開発中バージョンを試すには、以下を実行します: MCP ツールバージョンを使用する場合は、以下を実行します:
```bash
python run_mcp.py
```
開発中のマルチエージェントバージョンを試すには、以下を実行します:
```bash ```bash
python run_flow.py python run_flow.py

View File

@ -81,6 +81,11 @@ source .venv/bin/activate # Unix/macOS의 경우
uv pip install -r requirements.txt uv pip install -r requirements.txt
``` ```
### 브라우저 자동화 도구 (선택사항)
```bash
playwright install
```
## 설정 방법 ## 설정 방법
OpenManus를 사용하려면 사용하는 LLM API에 대한 설정이 필요합니다. 아래 단계를 따라 설정을 완료하세요: OpenManus를 사용하려면 사용하는 LLM API에 대한 설정이 필요합니다. 아래 단계를 따라 설정을 완료하세요:
@ -119,7 +124,12 @@ python main.py
이후 터미널에서 아이디어를 작성하세요! 이후 터미널에서 아이디어를 작성하세요!
unstable 버전을 실행하려면 아래 명령어를 사용할 수도 있습니다: MCP 도구 버전을 사용하려면 다음을 실행하세요:
```bash
python run_mcp.py
```
불안정한 멀티 에이전트 버전을 실행하려면 다음을 실행할 수 있습니다:
```bash ```bash
python run_flow.py python run_flow.py

View File

@ -82,6 +82,11 @@ source .venv/bin/activate # Unix/macOS 系统
uv pip install -r requirements.txt uv pip install -r requirements.txt
``` ```
### 浏览器自动化工具(可选)
```bash
playwright install
```
## 配置说明 ## 配置说明
OpenManus 需要配置使用的 LLM API请按以下步骤设置 OpenManus 需要配置使用的 LLM API请按以下步骤设置
@ -120,7 +125,12 @@ python main.py
然后通过终端输入你的创意! 然后通过终端输入你的创意!
如需体验不稳定的开发版本,可运行: 如需使用 MCP 工具版本,可运行:
```bash
python run_mcp.py
```
如需体验不稳定的多智能体版本,可运行:
```bash ```bash
python run_flow.py python run_flow.py

View File

@ -1,5 +1,6 @@
from app.agent.base import BaseAgent from app.agent.base import BaseAgent
from app.agent.browser import BrowserAgent from app.agent.browser import BrowserAgent
from app.agent.mcp import MCPAgent
from app.agent.planning import PlanningAgent from app.agent.planning import PlanningAgent
from app.agent.react import ReActAgent from app.agent.react import ReActAgent
from app.agent.swe import SWEAgent from app.agent.swe import SWEAgent
@ -13,4 +14,5 @@ __all__ = [
"ReActAgent", "ReActAgent",
"SWEAgent", "SWEAgent",
"ToolCallAgent", "ToolCallAgent",
"MCPAgent",
] ]

185
app/agent/mcp.py Normal file
View File

@ -0,0 +1,185 @@
from typing import Any, Dict, List, Optional, Tuple
from pydantic import Field
from app.agent.toolcall import ToolCallAgent
from app.logger import logger
from app.prompt.mcp import MULTIMEDIA_RESPONSE_PROMPT, NEXT_STEP_PROMPT, SYSTEM_PROMPT
from app.schema import AgentState, Message
from app.tool.base import ToolResult
from app.tool.mcp import MCPClients
class MCPAgent(ToolCallAgent):
"""Agent for interacting with MCP (Model Context Protocol) servers.
This agent connects to an MCP server using either SSE or stdio transport
and makes the server's tools available through the agent's tool interface.
"""
name: str = "mcp_agent"
description: str = "An agent that connects to an MCP server and uses its tools."
system_prompt: str = SYSTEM_PROMPT
next_step_prompt: str = NEXT_STEP_PROMPT
# Initialize MCP tool collection
mcp_clients: MCPClients = Field(default_factory=MCPClients)
available_tools: MCPClients = None # Will be set in initialize()
max_steps: int = 20
connection_type: str = "stdio" # "stdio" or "sse"
# Track tool schemas to detect changes
tool_schemas: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
_refresh_tools_interval: int = 5 # Refresh tools every N steps
# Special tool names that should trigger termination
special_tool_names: List[str] = Field(default_factory=lambda: ["terminate"])
async def initialize(
self,
connection_type: Optional[str] = None,
server_url: Optional[str] = None,
command: Optional[str] = None,
args: Optional[List[str]] = None,
) -> None:
"""Initialize the MCP connection.
Args:
connection_type: Type of connection to use ("stdio" or "sse")
server_url: URL of the MCP server (for SSE connection)
command: Command to run (for stdio connection)
args: Arguments for the command (for stdio connection)
"""
if connection_type:
self.connection_type = connection_type
# Connect to the MCP server based on connection type
if self.connection_type == "sse":
if not server_url:
raise ValueError("Server URL is required for SSE connection")
await self.mcp_clients.connect_sse(server_url=server_url)
elif self.connection_type == "stdio":
if not command:
raise ValueError("Command is required for stdio connection")
await self.mcp_clients.connect_stdio(command=command, args=args or [])
else:
raise ValueError(f"Unsupported connection type: {self.connection_type}")
# Set available_tools to our MCP instance
self.available_tools = self.mcp_clients
# Store initial tool schemas
await self._refresh_tools()
# Add system message about available tools
tool_names = list(self.mcp_clients.tool_map.keys())
tools_info = ", ".join(tool_names)
# Add system prompt and available tools information
self.memory.add_message(
Message.system_message(
f"{self.system_prompt}\n\nAvailable MCP tools: {tools_info}"
)
)
async def _refresh_tools(self) -> Tuple[List[str], List[str]]:
"""Refresh the list of available tools from the MCP server.
Returns:
A tuple of (added_tools, removed_tools)
"""
if not self.mcp_clients.session:
return [], []
# Get current tool schemas directly from the server
response = await self.mcp_clients.session.list_tools()
current_tools = {tool.name: tool.inputSchema for tool in response.tools}
# Determine added, removed, and changed tools
current_names = set(current_tools.keys())
previous_names = set(self.tool_schemas.keys())
added_tools = list(current_names - previous_names)
removed_tools = list(previous_names - current_names)
# Check for schema changes in existing tools
changed_tools = []
for name in current_names.intersection(previous_names):
if current_tools[name] != self.tool_schemas.get(name):
changed_tools.append(name)
# Update stored schemas
self.tool_schemas = current_tools
# Log and notify about changes
if added_tools:
logger.info(f"Added MCP tools: {added_tools}")
self.memory.add_message(
Message.system_message(f"New tools available: {', '.join(added_tools)}")
)
if removed_tools:
logger.info(f"Removed MCP tools: {removed_tools}")
self.memory.add_message(
Message.system_message(
f"Tools no longer available: {', '.join(removed_tools)}"
)
)
if changed_tools:
logger.info(f"Changed MCP tools: {changed_tools}")
return added_tools, removed_tools
async def think(self) -> bool:
"""Process current state and decide next action."""
# Check MCP session and tools availability
if not self.mcp_clients.session or not self.mcp_clients.tool_map:
logger.info("MCP service is no longer available, ending interaction")
self.state = AgentState.FINISHED
return False
# Refresh tools periodically
if self.current_step % self._refresh_tools_interval == 0:
await self._refresh_tools()
# All tools removed indicates shutdown
if not self.mcp_clients.tool_map:
logger.info("MCP service has shut down, ending interaction")
self.state = AgentState.FINISHED
return False
# Use the parent class's think method
return await super().think()
async def _handle_special_tool(self, name: str, result: Any, **kwargs) -> None:
"""Handle special tool execution and state changes"""
# First process with parent handler
await super()._handle_special_tool(name, result, **kwargs)
# Handle multimedia responses
if isinstance(result, ToolResult) and result.base64_image:
self.memory.add_message(
Message.system_message(
MULTIMEDIA_RESPONSE_PROMPT.format(tool_name=name)
)
)
def _should_finish_execution(self, name: str, **kwargs) -> bool:
"""Determine if tool execution should finish the agent"""
# Terminate if the tool name is 'terminate'
return name.lower() == "terminate"
async def cleanup(self) -> None:
"""Clean up MCP connection when done."""
if self.mcp_clients.session:
await self.mcp_clients.disconnect()
logger.info("MCP connection closed")
async def run(self, request: Optional[str] = None) -> str:
"""Run the agent with cleanup when done."""
try:
result = await super().run(request)
return result
finally:
# Ensure cleanup happens even if there's an error
await self.cleanup()

View File

@ -29,7 +29,8 @@ class SWEAgent(ToolCallAgent):
async def think(self) -> bool: async def think(self) -> bool:
"""Process current state and decide next action""" """Process current state and decide next action"""
# Update working directory # Update working directory
self.working_dir = await self.bash.execute("pwd") result = await self.bash.execute("pwd")
self.working_dir = result.output
self.next_step_prompt = self.next_step_prompt.format( self.next_step_prompt = self.next_step_prompt.format(
current_dir=self.working_dir current_dir=self.working_dir
) )

View File

@ -71,40 +71,42 @@ class ToolCallAgent(ReActAgent):
return False return False
raise raise
self.tool_calls = response.tool_calls self.tool_calls = tool_calls = (
response.tool_calls if response and response.tool_calls else []
)
content = response.content if response and response.content else ""
# Log response info # Log response info
logger.info(f"{self.name}'s thoughts: {response.content}") logger.info(f"{self.name}'s thoughts: {content}")
logger.info( logger.info(
f"🛠️ {self.name} selected {len(response.tool_calls) if response.tool_calls else 0} tools to use" f"🛠️ {self.name} selected {len(tool_calls) if tool_calls else 0} tools to use"
) )
if response.tool_calls: if tool_calls:
logger.info( logger.info(
f"🧰 Tools being prepared: {[call.function.name for call in response.tool_calls]}" f"🧰 Tools being prepared: {[call.function.name for call in tool_calls]}"
)
logger.info(
f"🔧 Tool arguments: {response.tool_calls[0].function.arguments}"
) )
logger.info(f"🔧 Tool arguments: {tool_calls[0].function.arguments}")
try: try:
if response is None:
raise RuntimeError("No response received from the LLM")
# Handle different tool_choices modes # Handle different tool_choices modes
if self.tool_choices == ToolChoice.NONE: if self.tool_choices == ToolChoice.NONE:
if response.tool_calls: if tool_calls:
logger.warning( logger.warning(
f"🤔 Hmm, {self.name} tried to use tools when they weren't available!" f"🤔 Hmm, {self.name} tried to use tools when they weren't available!"
) )
if response.content: if content:
self.memory.add_message(Message.assistant_message(response.content)) self.memory.add_message(Message.assistant_message(content))
return True return True
return False return False
# Create and add assistant message # Create and add assistant message
assistant_msg = ( assistant_msg = (
Message.from_tool_calls( Message.from_tool_calls(content=content, tool_calls=self.tool_calls)
content=response.content, tool_calls=self.tool_calls
)
if self.tool_calls if self.tool_calls
else Message.assistant_message(response.content) else Message.assistant_message(content)
) )
self.memory.add_message(assistant_msg) self.memory.add_message(assistant_msg)
@ -113,7 +115,7 @@ class ToolCallAgent(ReActAgent):
# For 'auto' mode, continue with content if no commands but content exists # For 'auto' mode, continue with content if no commands but content exists
if self.tool_choices == ToolChoice.AUTO and not self.tool_calls: if self.tool_choices == ToolChoice.AUTO and not self.tool_calls:
return bool(response.content) return bool(content)
return bool(self.tool_calls) return bool(self.tool_calls)
except Exception as e: except Exception as e:
@ -209,7 +211,7 @@ class ToolCallAgent(ReActAgent):
return f"Error: {error_msg}" return f"Error: {error_msg}"
except Exception as e: except Exception as e:
error_msg = f"⚠️ Tool '{name}' encountered a problem: {str(e)}" error_msg = f"⚠️ Tool '{name}' encountered a problem: {str(e)}"
logger.error(error_msg) logger.exception(error_msg)
return f"Error: {error_msg}" return f"Error: {error_msg}"
async def _handle_special_tool(self, name: str, result: Any, **kwargs): async def _handle_special_tool(self, name: str, result: Any, **kwargs):

334
app/bedrock.py Normal file
View File

@ -0,0 +1,334 @@
import json
import sys
import time
import uuid
from datetime import datetime
from typing import Dict, List, Literal, Optional
import boto3
# Global variables to track the current tool use ID across function calls
# Tmp solution
CURRENT_TOOLUSE_ID = None
# Class to handle OpenAI-style response formatting
class OpenAIResponse:
def __init__(self, data):
# Recursively convert nested dicts and lists to OpenAIResponse objects
for key, value in data.items():
if isinstance(value, dict):
value = OpenAIResponse(value)
elif isinstance(value, list):
value = [
OpenAIResponse(item) if isinstance(item, dict) else item
for item in value
]
setattr(self, key, value)
def model_dump(self, *args, **kwargs):
# Convert object to dict and add timestamp
data = self.__dict__
data["created_at"] = datetime.now().isoformat()
return data
# Main client class for interacting with Amazon Bedrock
class BedrockClient:
def __init__(self):
# Initialize Bedrock client, you need to configure AWS env first
try:
self.client = boto3.client("bedrock-runtime")
self.chat = Chat(self.client)
except Exception as e:
print(f"Error initializing Bedrock client: {e}")
sys.exit(1)
# Chat interface class
class Chat:
def __init__(self, client):
self.completions = ChatCompletions(client)
# Core class handling chat completions functionality
class ChatCompletions:
def __init__(self, client):
self.client = client
def _convert_openai_tools_to_bedrock_format(self, tools):
# Convert OpenAI function calling format to Bedrock tool format
bedrock_tools = []
for tool in tools:
if tool.get("type") == "function":
function = tool.get("function", {})
bedrock_tool = {
"toolSpec": {
"name": function.get("name", ""),
"description": function.get("description", ""),
"inputSchema": {
"json": {
"type": "object",
"properties": function.get("parameters", {}).get(
"properties", {}
),
"required": function.get("parameters", {}).get(
"required", []
),
}
},
}
}
bedrock_tools.append(bedrock_tool)
return bedrock_tools
def _convert_openai_messages_to_bedrock_format(self, messages):
# Convert OpenAI message format to Bedrock message format
bedrock_messages = []
system_prompt = []
for message in messages:
if message.get("role") == "system":
system_prompt = [{"text": message.get("content")}]
elif message.get("role") == "user":
bedrock_message = {
"role": message.get("role", "user"),
"content": [{"text": message.get("content")}],
}
bedrock_messages.append(bedrock_message)
elif message.get("role") == "assistant":
bedrock_message = {
"role": "assistant",
"content": [{"text": message.get("content")}],
}
openai_tool_calls = message.get("tool_calls", [])
if openai_tool_calls:
bedrock_tool_use = {
"toolUseId": openai_tool_calls[0]["id"],
"name": openai_tool_calls[0]["function"]["name"],
"input": json.loads(
openai_tool_calls[0]["function"]["arguments"]
),
}
bedrock_message["content"].append({"toolUse": bedrock_tool_use})
global CURRENT_TOOLUSE_ID
CURRENT_TOOLUSE_ID = openai_tool_calls[0]["id"]
bedrock_messages.append(bedrock_message)
elif message.get("role") == "tool":
bedrock_message = {
"role": "user",
"content": [
{
"toolResult": {
"toolUseId": CURRENT_TOOLUSE_ID,
"content": [{"text": message.get("content")}],
}
}
],
}
bedrock_messages.append(bedrock_message)
else:
raise ValueError(f"Invalid role: {message.get('role')}")
return system_prompt, bedrock_messages
def _convert_bedrock_response_to_openai_format(self, bedrock_response):
# Convert Bedrock response format to OpenAI format
content = ""
if bedrock_response.get("output", {}).get("message", {}).get("content"):
content_array = bedrock_response["output"]["message"]["content"]
content = "".join(item.get("text", "") for item in content_array)
if content == "":
content = "."
# Handle tool calls in response
openai_tool_calls = []
if bedrock_response.get("output", {}).get("message", {}).get("content"):
for content_item in bedrock_response["output"]["message"]["content"]:
if content_item.get("toolUse"):
bedrock_tool_use = content_item["toolUse"]
global CURRENT_TOOLUSE_ID
CURRENT_TOOLUSE_ID = bedrock_tool_use["toolUseId"]
openai_tool_call = {
"id": CURRENT_TOOLUSE_ID,
"type": "function",
"function": {
"name": bedrock_tool_use["name"],
"arguments": json.dumps(bedrock_tool_use["input"]),
},
}
openai_tool_calls.append(openai_tool_call)
# Construct final OpenAI format response
openai_format = {
"id": f"chatcmpl-{uuid.uuid4()}",
"created": int(time.time()),
"object": "chat.completion",
"system_fingerprint": None,
"choices": [
{
"finish_reason": bedrock_response.get("stopReason", "end_turn"),
"index": 0,
"message": {
"content": content,
"role": bedrock_response.get("output", {})
.get("message", {})
.get("role", "assistant"),
"tool_calls": openai_tool_calls
if openai_tool_calls != []
else None,
"function_call": None,
},
}
],
"usage": {
"completion_tokens": bedrock_response.get("usage", {}).get(
"outputTokens", 0
),
"prompt_tokens": bedrock_response.get("usage", {}).get(
"inputTokens", 0
),
"total_tokens": bedrock_response.get("usage", {}).get("totalTokens", 0),
},
}
return OpenAIResponse(openai_format)
async def _invoke_bedrock(
self,
model: str,
messages: List[Dict[str, str]],
max_tokens: int,
temperature: float,
tools: Optional[List[dict]] = None,
tool_choice: Literal["none", "auto", "required"] = "auto",
**kwargs,
) -> OpenAIResponse:
# Non-streaming invocation of Bedrock model
(
system_prompt,
bedrock_messages,
) = self._convert_openai_messages_to_bedrock_format(messages)
response = self.client.converse(
modelId=model,
system=system_prompt,
messages=bedrock_messages,
inferenceConfig={"temperature": temperature, "maxTokens": max_tokens},
toolConfig={"tools": tools} if tools else None,
)
openai_response = self._convert_bedrock_response_to_openai_format(response)
return openai_response
async def _invoke_bedrock_stream(
self,
model: str,
messages: List[Dict[str, str]],
max_tokens: int,
temperature: float,
tools: Optional[List[dict]] = None,
tool_choice: Literal["none", "auto", "required"] = "auto",
**kwargs,
) -> OpenAIResponse:
# Streaming invocation of Bedrock model
(
system_prompt,
bedrock_messages,
) = self._convert_openai_messages_to_bedrock_format(messages)
response = self.client.converse_stream(
modelId=model,
system=system_prompt,
messages=bedrock_messages,
inferenceConfig={"temperature": temperature, "maxTokens": max_tokens},
toolConfig={"tools": tools} if tools else None,
)
# Initialize response structure
bedrock_response = {
"output": {"message": {"role": "", "content": []}},
"stopReason": "",
"usage": {},
"metrics": {},
}
bedrock_response_text = ""
bedrock_response_tool_input = ""
# Process streaming response
stream = response.get("stream")
if stream:
for event in stream:
if event.get("messageStart", {}).get("role"):
bedrock_response["output"]["message"]["role"] = event[
"messageStart"
]["role"]
if event.get("contentBlockDelta", {}).get("delta", {}).get("text"):
bedrock_response_text += event["contentBlockDelta"]["delta"]["text"]
print(
event["contentBlockDelta"]["delta"]["text"], end="", flush=True
)
if event.get("contentBlockStop", {}).get("contentBlockIndex") == 0:
bedrock_response["output"]["message"]["content"].append(
{"text": bedrock_response_text}
)
if event.get("contentBlockStart", {}).get("start", {}).get("toolUse"):
bedrock_tool_use = event["contentBlockStart"]["start"]["toolUse"]
tool_use = {
"toolUseId": bedrock_tool_use["toolUseId"],
"name": bedrock_tool_use["name"],
}
bedrock_response["output"]["message"]["content"].append(
{"toolUse": tool_use}
)
global CURRENT_TOOLUSE_ID
CURRENT_TOOLUSE_ID = bedrock_tool_use["toolUseId"]
if event.get("contentBlockDelta", {}).get("delta", {}).get("toolUse"):
bedrock_response_tool_input += event["contentBlockDelta"]["delta"][
"toolUse"
]["input"]
print(
event["contentBlockDelta"]["delta"]["toolUse"]["input"],
end="",
flush=True,
)
if event.get("contentBlockStop", {}).get("contentBlockIndex") == 1:
bedrock_response["output"]["message"]["content"][1]["toolUse"][
"input"
] = json.loads(bedrock_response_tool_input)
print()
openai_response = self._convert_bedrock_response_to_openai_format(
bedrock_response
)
return openai_response
def create(
self,
model: str,
messages: List[Dict[str, str]],
max_tokens: int,
temperature: float,
stream: Optional[bool] = True,
tools: Optional[List[dict]] = None,
tool_choice: Literal["none", "auto", "required"] = "auto",
**kwargs,
) -> OpenAIResponse:
# Main entry point for chat completion
bedrock_tools = []
if tools is not None:
bedrock_tools = self._convert_openai_tools_to_bedrock_format(tools)
if stream:
return self._invoke_bedrock_stream(
model,
messages,
max_tokens,
temperature,
bedrock_tools,
tool_choice,
**kwargs,
)
else:
return self._invoke_bedrock(
model,
messages,
max_tokens,
temperature,
bedrock_tools,
tool_choice,
**kwargs,
)

View File

@ -25,7 +25,7 @@ class LLMSettings(BaseModel):
description="Maximum input tokens to use across all requests (None for unlimited)", description="Maximum input tokens to use across all requests (None for unlimited)",
) )
temperature: float = Field(1.0, description="Sampling temperature") temperature: float = Field(1.0, description="Sampling temperature")
api_type: str = Field(..., description="AzureOpenai or Openai") api_type: str = Field(..., description="Azure, Openai, or Ollama")
api_version: str = Field(..., description="Azure Openai version if AzureOpenai") api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
@ -37,6 +37,18 @@ class ProxySettings(BaseModel):
class SearchSettings(BaseModel): class SearchSettings(BaseModel):
engine: str = Field(default="Google", description="Search engine the llm to use") engine: str = Field(default="Google", description="Search engine the llm to use")
fallback_engines: List[str] = Field(
default_factory=lambda: ["DuckDuckGo", "Baidu"],
description="Fallback search engines to try if the primary engine fails",
)
retry_delay: int = Field(
default=60,
description="Seconds to wait before retrying all engines again after they all fail",
)
max_retries: int = Field(
default=3,
description="Maximum number of times to retry all engines when all fail",
)
class BrowserSettings(BaseModel): class BrowserSettings(BaseModel):
@ -227,5 +239,10 @@ class Config:
"""Get the workspace root directory""" """Get the workspace root directory"""
return WORKSPACE_ROOT return WORKSPACE_ROOT
@property
def root_path(self) -> Path:
"""Get the root path of the application"""
return PROJECT_ROOT
config = Config() config = Config()

View File

@ -1,5 +1,4 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from enum import Enum
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
from pydantic import BaseModel from pydantic import BaseModel
@ -7,10 +6,6 @@ from pydantic import BaseModel
from app.agent.base import BaseAgent from app.agent.base import BaseAgent
class FlowType(str, Enum):
PLANNING = "planning"
class BaseFlow(BaseModel, ABC): class BaseFlow(BaseModel, ABC):
"""Base class for execution flows supporting multiple agents""" """Base class for execution flows supporting multiple agents"""
@ -60,32 +55,3 @@ class BaseFlow(BaseModel, ABC):
@abstractmethod @abstractmethod
async def execute(self, input_text: str) -> str: async def execute(self, input_text: str) -> str:
"""Execute the flow with given input""" """Execute the flow with given input"""
class PlanStepStatus(str, Enum):
"""Enum class defining possible statuses of a plan step"""
NOT_STARTED = "not_started"
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
BLOCKED = "blocked"
@classmethod
def get_all_statuses(cls) -> list[str]:
"""Return a list of all possible step status values"""
return [status.value for status in cls]
@classmethod
def get_active_statuses(cls) -> list[str]:
"""Return a list of values representing active statuses (not started or in progress)"""
return [cls.NOT_STARTED.value, cls.IN_PROGRESS.value]
@classmethod
def get_status_marks(cls) -> Dict[str, str]:
"""Return a mapping of statuses to their marker symbols"""
return {
cls.COMPLETED.value: "[✓]",
cls.IN_PROGRESS.value: "[→]",
cls.BLOCKED.value: "[!]",
cls.NOT_STARTED.value: "[ ]",
}

View File

@ -1,10 +1,15 @@
from enum import Enum
from typing import Dict, List, Union from typing import Dict, List, Union
from app.agent.base import BaseAgent from app.agent.base import BaseAgent
from app.flow.base import BaseFlow, FlowType from app.flow.base import BaseFlow
from app.flow.planning import PlanningFlow from app.flow.planning import PlanningFlow
class FlowType(str, Enum):
PLANNING = "planning"
class FlowFactory: class FlowFactory:
"""Factory for creating different types of flows with support for multiple agents""" """Factory for creating different types of flows with support for multiple agents"""

View File

@ -1,17 +1,47 @@
import json import json
import time import time
from enum import Enum
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
from pydantic import Field from pydantic import Field
from app.agent.base import BaseAgent from app.agent.base import BaseAgent
from app.flow.base import BaseFlow, PlanStepStatus from app.flow.base import BaseFlow
from app.llm import LLM from app.llm import LLM
from app.logger import logger from app.logger import logger
from app.schema import AgentState, Message, ToolChoice from app.schema import AgentState, Message, ToolChoice
from app.tool import PlanningTool from app.tool import PlanningTool
class PlanStepStatus(str, Enum):
"""Enum class defining possible statuses of a plan step"""
NOT_STARTED = "not_started"
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
BLOCKED = "blocked"
@classmethod
def get_all_statuses(cls) -> list[str]:
"""Return a list of all possible step status values"""
return [status.value for status in cls]
@classmethod
def get_active_statuses(cls) -> list[str]:
"""Return a list of values representing active statuses (not started or in progress)"""
return [cls.NOT_STARTED.value, cls.IN_PROGRESS.value]
@classmethod
def get_status_marks(cls) -> Dict[str, str]:
"""Return a mapping of statuses to their marker symbols"""
return {
cls.COMPLETED.value: "[✓]",
cls.IN_PROGRESS.value: "[→]",
cls.BLOCKED.value: "[!]",
cls.NOT_STARTED.value: "[ ]",
}
class PlanningFlow(BaseFlow): class PlanningFlow(BaseFlow):
"""A flow that manages planning and execution of tasks using agents.""" """A flow that manages planning and execution of tasks using agents."""

View File

@ -10,6 +10,7 @@ from openai import (
OpenAIError, OpenAIError,
RateLimitError, RateLimitError,
) )
from openai.types.chat.chat_completion_message import ChatCompletionMessage
from tenacity import ( from tenacity import (
retry, retry,
retry_if_exception_type, retry_if_exception_type,
@ -17,6 +18,7 @@ from tenacity import (
wait_random_exponential, wait_random_exponential,
) )
from app.bedrock import BedrockClient
from app.config import LLMSettings, config from app.config import LLMSettings, config
from app.exceptions import TokenLimitExceeded from app.exceptions import TokenLimitExceeded
from app.logger import logger # Assuming a logger is set up in your app from app.logger import logger # Assuming a logger is set up in your app
@ -30,6 +32,14 @@ from app.schema import (
REASONING_MODELS = ["o1", "o3-mini"] REASONING_MODELS = ["o1", "o3-mini"]
MULTIMODAL_MODELS = [
"gpt-4-vision-preview",
"gpt-4o",
"gpt-4o-mini",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
"claude-3-haiku-20240307",
]
class TokenCounter: class TokenCounter:
@ -216,6 +226,8 @@ class LLM:
api_key=self.api_key, api_key=self.api_key,
api_version=self.api_version, api_version=self.api_version,
) )
elif self.api_type == "aws":
self.client = BedrockClient()
else: else:
self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url) self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
@ -259,12 +271,15 @@ class LLM:
return "Token limit exceeded" return "Token limit exceeded"
@staticmethod @staticmethod
def format_messages(messages: List[Union[dict, Message]]) -> List[dict]: def format_messages(
messages: List[Union[dict, Message]], supports_images: bool = False
) -> List[dict]:
""" """
Format messages for LLM by converting them to OpenAI message format. Format messages for LLM by converting them to OpenAI message format.
Args: Args:
messages: List of messages that can be either dict or Message objects messages: List of messages that can be either dict or Message objects
supports_images: Flag indicating if the target model supports image inputs
Returns: Returns:
List[dict]: List of formatted messages in OpenAI format List[dict]: List of formatted messages in OpenAI format
@ -288,54 +303,58 @@ class LLM:
if isinstance(message, Message): if isinstance(message, Message):
message = message.to_dict() message = message.to_dict()
if not isinstance(message, dict): if isinstance(message, dict):
# If message is a dict, ensure it has required fields
if "role" not in message:
raise ValueError("Message dict must contain 'role' field")
# Process base64 images if present and model supports images
if supports_images and message.get("base64_image"):
# Initialize or convert content to appropriate format
if not message.get("content"):
message["content"] = []
elif isinstance(message["content"], str):
message["content"] = [
{"type": "text", "text": message["content"]}
]
elif isinstance(message["content"], list):
# Convert string items to proper text objects
message["content"] = [
(
{"type": "text", "text": item}
if isinstance(item, str)
else item
)
for item in message["content"]
]
# Add the image to content
message["content"].append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{message['base64_image']}"
},
}
)
# Remove the base64_image field
del message["base64_image"]
# If model doesn't support images but message has base64_image, handle gracefully
elif not supports_images and message.get("base64_image"):
# Just remove the base64_image field and keep the text content
del message["base64_image"]
if "content" in message or "tool_calls" in message:
formatted_messages.append(message)
# else: do not include the message
else:
raise TypeError(f"Unsupported message type: {type(message)}") raise TypeError(f"Unsupported message type: {type(message)}")
# Validate required fields # Validate all messages have required fields
if "role" not in message: for msg in formatted_messages:
raise ValueError("Message dict must contain 'role' field") if msg["role"] not in ROLE_VALUES:
raise ValueError(f"Invalid role: {msg['role']}")
# Process base64 images if present
if message.get("base64_image"):
# Initialize or convert content to appropriate format
if not message.get("content"):
message["content"] = []
elif isinstance(message["content"], str):
message["content"] = [{"type": "text", "text": message["content"]}]
elif isinstance(message["content"], list):
# Convert string items to proper text objects
message["content"] = [
(
{"type": "text", "text": item}
if isinstance(item, str)
else item
)
for item in message["content"]
]
# Add the image to content
message["content"].append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{message['base64_image']}"
},
}
)
# Remove the base64_image field
del message["base64_image"]
# Only include messages with content or tool_calls
if "content" in message or "tool_calls" in message:
formatted_messages.append(message)
# Validate all roles
invalid_roles = [
msg for msg in formatted_messages if msg["role"] not in ROLE_VALUES
]
if invalid_roles:
raise ValueError(f"Invalid role: {invalid_roles[0]['role']}")
return formatted_messages return formatted_messages
@ -372,12 +391,15 @@ class LLM:
Exception: For unexpected errors Exception: For unexpected errors
""" """
try: try:
# Format system and user messages # Check if the model supports images
supports_images = self.model in MULTIMODAL_MODELS
# Format system and user messages with image support check
if system_msgs: if system_msgs:
system_msgs = self.format_messages(system_msgs) system_msgs = self.format_messages(system_msgs, supports_images)
messages = system_msgs + self.format_messages(messages) messages = system_msgs + self.format_messages(messages, supports_images)
else: else:
messages = self.format_messages(messages) messages = self.format_messages(messages, supports_images)
# Calculate input token count # Calculate input token count
input_tokens = self.count_message_tokens(messages) input_tokens = self.count_message_tokens(messages)
@ -403,9 +425,9 @@ class LLM:
if not stream: if not stream:
# Non-streaming request # Non-streaming request
params["stream"] = False response = await self.client.chat.completions.create(
**params, stream=False
response = await self.client.chat.completions.create(**params) )
if not response.choices or not response.choices[0].message.content: if not response.choices or not response.choices[0].message.content:
raise ValueError("Empty or invalid response from LLM") raise ValueError("Empty or invalid response from LLM")
@ -420,8 +442,7 @@ class LLM:
# Streaming request, For streaming, update estimated token count before making the request # Streaming request, For streaming, update estimated token count before making the request
self.update_token_count(input_tokens) self.update_token_count(input_tokens)
params["stream"] = True response = await self.client.chat.completions.create(**params, stream=True)
response = await self.client.chat.completions.create(**params)
collected_messages = [] collected_messages = []
completion_text = "" completion_text = ""
@ -448,11 +469,11 @@ class LLM:
except TokenLimitExceeded: except TokenLimitExceeded:
# Re-raise token limit errors without logging # Re-raise token limit errors without logging
raise raise
except ValueError as ve: except ValueError:
logger.error(f"Validation error: {ve}") logger.exception(f"Validation error")
raise raise
except OpenAIError as oe: except OpenAIError as oe:
logger.error(f"OpenAI API error: {oe}") logger.exception(f"OpenAI API error")
if isinstance(oe, AuthenticationError): if isinstance(oe, AuthenticationError):
logger.error("Authentication failed. Check API key.") logger.error("Authentication failed. Check API key.")
elif isinstance(oe, RateLimitError): elif isinstance(oe, RateLimitError):
@ -460,8 +481,8 @@ class LLM:
elif isinstance(oe, APIError): elif isinstance(oe, APIError):
logger.error(f"API error: {oe}") logger.error(f"API error: {oe}")
raise raise
except Exception as e: except Exception:
logger.error(f"Unexpected error in ask: {e}") logger.exception(f"Unexpected error in ask")
raise raise
@retry( @retry(
@ -499,8 +520,15 @@ class LLM:
Exception: For unexpected errors Exception: For unexpected errors
""" """
try: try:
# Format messages # For ask_with_images, we always set supports_images to True because
formatted_messages = self.format_messages(messages) # this method should only be called with models that support images
if self.model not in MULTIMODAL_MODELS:
raise ValueError(
f"Model {self.model} does not support images. Use a model from {MULTIMODAL_MODELS}"
)
# Format messages with image support
formatted_messages = self.format_messages(messages, supports_images=True)
# Ensure the last message is from the user to attach images # Ensure the last message is from the user to attach images
if not formatted_messages or formatted_messages[-1]["role"] != "user": if not formatted_messages or formatted_messages[-1]["role"] != "user":
@ -539,7 +567,10 @@ class LLM:
# Add system messages if provided # Add system messages if provided
if system_msgs: if system_msgs:
all_messages = self.format_messages(system_msgs) + formatted_messages all_messages = (
self.format_messages(system_msgs, supports_images=True)
+ formatted_messages
)
else: else:
all_messages = formatted_messages all_messages = formatted_messages
@ -626,7 +657,7 @@ class LLM:
tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore
temperature: Optional[float] = None, temperature: Optional[float] = None,
**kwargs, **kwargs,
): ) -> ChatCompletionMessage | None:
""" """
Ask LLM using functions/tools and return the response. Ask LLM using functions/tools and return the response.
@ -653,12 +684,15 @@ class LLM:
if tool_choice not in TOOL_CHOICE_VALUES: if tool_choice not in TOOL_CHOICE_VALUES:
raise ValueError(f"Invalid tool_choice: {tool_choice}") raise ValueError(f"Invalid tool_choice: {tool_choice}")
# Check if the model supports images
supports_images = self.model in MULTIMODAL_MODELS
# Format messages # Format messages
if system_msgs: if system_msgs:
system_msgs = self.format_messages(system_msgs) system_msgs = self.format_messages(system_msgs, supports_images)
messages = system_msgs + self.format_messages(messages) messages = system_msgs + self.format_messages(messages, supports_images)
else: else:
messages = self.format_messages(messages) messages = self.format_messages(messages, supports_images)
# Calculate input token count # Calculate input token count
input_tokens = self.count_message_tokens(messages) input_tokens = self.count_message_tokens(messages)
@ -701,12 +735,15 @@ class LLM:
temperature if temperature is not None else self.temperature temperature if temperature is not None else self.temperature
) )
response = await self.client.chat.completions.create(**params) response: ChatCompletion = await self.client.chat.completions.create(
**params, stream=False
)
# Check if response is valid # Check if response is valid
if not response.choices or not response.choices[0].message: if not response.choices or not response.choices[0].message:
print(response) print(response)
raise ValueError("Invalid or empty response from LLM") # raise ValueError("Invalid or empty response from LLM")
return None
# Update token counts # Update token counts
self.update_token_count( self.update_token_count(

0
app/mcp/__init__.py Normal file
View File

180
app/mcp/server.py Normal file
View File

@ -0,0 +1,180 @@
import logging
import sys
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(sys.stderr)])
import argparse
import asyncio
import atexit
import json
from inspect import Parameter, Signature
from typing import Any, Dict, Optional
from mcp.server.fastmcp import FastMCP
from app.logger import logger
from app.tool.base import BaseTool
from app.tool.bash import Bash
from app.tool.browser_use_tool import BrowserUseTool
from app.tool.str_replace_editor import StrReplaceEditor
from app.tool.terminate import Terminate
class MCPServer:
"""MCP Server implementation with tool registration and management."""
def __init__(self, name: str = "openmanus"):
self.server = FastMCP(name)
self.tools: Dict[str, BaseTool] = {}
# Initialize standard tools
self.tools["bash"] = Bash()
self.tools["browser"] = BrowserUseTool()
self.tools["editor"] = StrReplaceEditor()
self.tools["terminate"] = Terminate()
def register_tool(self, tool: BaseTool, method_name: Optional[str] = None) -> None:
"""Register a tool with parameter validation and documentation."""
tool_name = method_name or tool.name
tool_param = tool.to_param()
tool_function = tool_param["function"]
# Define the async function to be registered
async def tool_method(**kwargs):
logger.info(f"Executing {tool_name}: {kwargs}")
result = await tool.execute(**kwargs)
logger.info(f"Result of {tool_name}: {result}")
# Handle different types of results (match original logic)
if hasattr(result, "model_dump"):
return json.dumps(result.model_dump())
elif isinstance(result, dict):
return json.dumps(result)
return result
# Set method metadata
tool_method.__name__ = tool_name
tool_method.__doc__ = self._build_docstring(tool_function)
tool_method.__signature__ = self._build_signature(tool_function)
# Store parameter schema (important for tools that access it programmatically)
param_props = tool_function.get("parameters", {}).get("properties", {})
required_params = tool_function.get("parameters", {}).get("required", [])
tool_method._parameter_schema = {
param_name: {
"description": param_details.get("description", ""),
"type": param_details.get("type", "any"),
"required": param_name in required_params,
}
for param_name, param_details in param_props.items()
}
# Register with server
self.server.tool()(tool_method)
logger.info(f"Registered tool: {tool_name}")
def _build_docstring(self, tool_function: dict) -> str:
"""Build a formatted docstring from tool function metadata."""
description = tool_function.get("description", "")
param_props = tool_function.get("parameters", {}).get("properties", {})
required_params = tool_function.get("parameters", {}).get("required", [])
# Build docstring (match original format)
docstring = description
if param_props:
docstring += "\n\nParameters:\n"
for param_name, param_details in param_props.items():
required_str = (
"(required)" if param_name in required_params else "(optional)"
)
param_type = param_details.get("type", "any")
param_desc = param_details.get("description", "")
docstring += (
f" {param_name} ({param_type}) {required_str}: {param_desc}\n"
)
return docstring
def _build_signature(self, tool_function: dict) -> Signature:
"""Build a function signature from tool function metadata."""
param_props = tool_function.get("parameters", {}).get("properties", {})
required_params = tool_function.get("parameters", {}).get("required", [])
parameters = []
# Follow original type mapping
for param_name, param_details in param_props.items():
param_type = param_details.get("type", "")
default = Parameter.empty if param_name in required_params else None
# Map JSON Schema types to Python types (same as original)
annotation = Any
if param_type == "string":
annotation = str
elif param_type == "integer":
annotation = int
elif param_type == "number":
annotation = float
elif param_type == "boolean":
annotation = bool
elif param_type == "object":
annotation = dict
elif param_type == "array":
annotation = list
# Create parameter with same structure as original
param = Parameter(
name=param_name,
kind=Parameter.KEYWORD_ONLY,
default=default,
annotation=annotation,
)
parameters.append(param)
return Signature(parameters=parameters)
async def cleanup(self) -> None:
"""Clean up server resources."""
logger.info("Cleaning up resources")
# Follow original cleanup logic - only clean browser tool
if "browser" in self.tools and hasattr(self.tools["browser"], "cleanup"):
await self.tools["browser"].cleanup()
def register_all_tools(self) -> None:
"""Register all tools with the server."""
for tool in self.tools.values():
self.register_tool(tool)
def run(self, transport: str = "stdio") -> None:
"""Run the MCP server."""
# Register all tools
self.register_all_tools()
# Register cleanup function (match original behavior)
atexit.register(lambda: asyncio.run(self.cleanup()))
# Start server (with same logging as original)
logger.info(f"Starting OpenManus server ({transport} mode)")
self.server.run(transport=transport)
def parse_args() -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="OpenManus MCP Server")
parser.add_argument(
"--transport",
choices=["stdio"],
default="stdio",
help="Communication method: stdio or http (default: stdio)",
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# Create and run server (maintaining original flow)
server = MCPServer()
server.run(transport=args.transport)

43
app/prompt/mcp.py Normal file
View File

@ -0,0 +1,43 @@
"""Prompts for the MCP Agent."""
SYSTEM_PROMPT = """You are an AI assistant with access to a Model Context Protocol (MCP) server.
You can use the tools provided by the MCP server to complete tasks.
The MCP server will dynamically expose tools that you can use - always check the available tools first.
When using an MCP tool:
1. Choose the appropriate tool based on your task requirements
2. Provide properly formatted arguments as required by the tool
3. Observe the results and use them to determine next steps
4. Tools may change during operation - new tools might appear or existing ones might disappear
Follow these guidelines:
- Call tools with valid parameters as documented in their schemas
- Handle errors gracefully by understanding what went wrong and trying again with corrected parameters
- For multimedia responses (like images), you'll receive a description of the content
- Complete user requests step by step, using the most appropriate tools
- If multiple tools need to be called in sequence, make one call at a time and wait for results
Remember to clearly explain your reasoning and actions to the user.
"""
NEXT_STEP_PROMPT = """Based on the current state and available tools, what should be done next?
Think step by step about the problem and identify which MCP tool would be most helpful for the current stage.
If you've already made progress, consider what additional information you need or what actions would move you closer to completing the task.
"""
# Additional specialized prompts
TOOL_ERROR_PROMPT = """You encountered an error with the tool '{tool_name}'.
Try to understand what went wrong and correct your approach.
Common issues include:
- Missing or incorrect parameters
- Invalid parameter formats
- Using a tool that's no longer available
- Attempting an operation that's not supported
Please check the tool specifications and try again with corrected parameters.
"""
MULTIMEDIA_RESPONSE_PROMPT = """You've received a multimedia response (image, audio, etc.) from the tool '{tool_name}'.
This content has been processed and described for you.
Use this information to continue the task or provide insights to the user.
"""

View File

@ -3,7 +3,7 @@ import os
from typing import Optional from typing import Optional
from app.exceptions import ToolError from app.exceptions import ToolError
from app.tool.base import BaseTool, CLIResult, ToolResult from app.tool.base import BaseTool, CLIResult
_BASH_DESCRIPTION = """Execute a bash command in the terminal. _BASH_DESCRIPTION = """Execute a bash command in the terminal.
@ -57,7 +57,7 @@ class _BashSession:
if not self._started: if not self._started:
raise ToolError("Session has not started.") raise ToolError("Session has not started.")
if self._process.returncode is not None: if self._process.returncode is not None:
return ToolResult( return CLIResult(
system="tool must be restarted", system="tool must be restarted",
error=f"bash has exited with returncode {self._process.returncode}", error=f"bash has exited with returncode {self._process.returncode}",
) )
@ -140,7 +140,7 @@ class Bash(BaseTool):
self._session = _BashSession() self._session = _BashSession()
await self._session.start() await self._session.start()
return ToolResult(system="tool has been restarted.") return CLIResult(system="tool has been restarted.")
if self._session is None: if self._session is None:
self._session = _BashSession() self._session = _BashSession()

View File

@ -1,4 +1,5 @@
import asyncio import asyncio
import base64
import json import json
from typing import Generic, Optional, TypeVar from typing import Generic, Optional, TypeVar
@ -418,17 +419,7 @@ class BrowserUseTool(BaseTool, Generic[Context]):
# Create prompt for LLM # Create prompt for LLM
prompt_text = """ prompt_text = """
Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format.
Examples of extraction goals:
- Extract all company names
- Extract specific descriptions
- Extract all information about a topic
- Extract links with companies in structured format
- Extract all links
If the goal is vague, summarize the page. Respond in JSON format.
Extraction goal: {goal} Extraction goal: {goal}
Page content: Page content:
@ -445,10 +436,54 @@ Page content:
messages = [Message.user_message(formatted_prompt)] messages = [Message.user_message(formatted_prompt)]
# Use LLM to extract content based on the goal # Define extraction function for the tool
response = await self.llm.ask(messages) extraction_function = {
"type": "function",
"function": {
"name": "extract_content",
"description": "Extract specific information from a webpage based on a goal",
"parameters": {
"type": "object",
"properties": {
"extracted_content": {
"type": "object",
"description": "The content extracted from the page according to the goal",
}
},
"required": ["extracted_content"],
},
},
}
# Use LLM to extract content with required function calling
response = await self.llm.ask_tool(
messages,
tools=[extraction_function],
tool_choice="required",
)
# Extract content from function call response
if (
response
and response.tool_calls
and len(response.tool_calls) > 0
):
# Get the first tool call arguments
tool_call = response.tool_calls[0]
# Parse the JSON arguments
try:
args = json.loads(tool_call.function.arguments)
extracted_content = args.get("extracted_content", {})
# Format extracted content as JSON string
content_json = json.dumps(
extracted_content, indent=2, ensure_ascii=False
)
msg = f"Extracted from page:\n{content_json}\n"
except Exception as e:
msg = f"Error parsing extraction result: {str(e)}\nRaw response: {tool_call.function.arguments}"
else:
msg = "No content was extracted from the page."
msg = f"Extracted from page:\n{response}\n"
return ToolResult(output=msg) return ToolResult(output=msg)
except Exception as e: except Exception as e:
# Provide a more helpful error message # Provide a more helpful error message
@ -518,7 +553,16 @@ Page content:
viewport_height = ctx.config.browser_window_size.get("height", 0) viewport_height = ctx.config.browser_window_size.get("height", 0)
# Take a screenshot for the state # Take a screenshot for the state
screenshot = await ctx.take_screenshot(full_page=True) page = await ctx.get_current_page()
await page.bring_to_front()
await page.wait_for_load_state()
screenshot = await page.screenshot(
full_page=True, animations="disabled", type="jpeg", quality=100
)
screenshot = base64.b64encode(screenshot).decode("utf-8")
# Build the state info with all required fields # Build the state info with all required fields
state_info = { state_info = {

View File

@ -42,17 +42,19 @@ class FileOperator(Protocol):
class LocalFileOperator(FileOperator): class LocalFileOperator(FileOperator):
"""File operations implementation for local filesystem.""" """File operations implementation for local filesystem."""
encoding: str = "utf-8"
async def read_file(self, path: PathLike) -> str: async def read_file(self, path: PathLike) -> str:
"""Read content from a local file.""" """Read content from a local file."""
try: try:
return Path(path).read_text() return Path(path).read_text(encoding=self.encoding)
except Exception as e: except Exception as e:
raise ToolError(f"Failed to read {path}: {str(e)}") from None raise ToolError(f"Failed to read {path}: {str(e)}") from None
async def write_file(self, path: PathLike, content: str) -> None: async def write_file(self, path: PathLike, content: str) -> None:
"""Write content to a local file.""" """Write content to a local file."""
try: try:
Path(path).write_text(content) Path(path).write_text(content, encoding=self.encoding)
except Exception as e: except Exception as e:
raise ToolError(f"Failed to write to {path}: {str(e)}") from None raise ToolError(f"Failed to write to {path}: {str(e)}") from None

115
app/tool/mcp.py Normal file
View File

@ -0,0 +1,115 @@
from contextlib import AsyncExitStack
from typing import List, Optional
from mcp import ClientSession, StdioServerParameters
from mcp.client.sse import sse_client
from mcp.client.stdio import stdio_client
from mcp.types import TextContent
from app.logger import logger
from app.tool.base import BaseTool, ToolResult
from app.tool.tool_collection import ToolCollection
class MCPClientTool(BaseTool):
"""Represents a tool proxy that can be called on the MCP server from the client side."""
session: Optional[ClientSession] = None
async def execute(self, **kwargs) -> ToolResult:
"""Execute the tool by making a remote call to the MCP server."""
if not self.session:
return ToolResult(error="Not connected to MCP server")
try:
result = await self.session.call_tool(self.name, kwargs)
content_str = ", ".join(
item.text for item in result.content if isinstance(item, TextContent)
)
return ToolResult(output=content_str or "No output returned.")
except Exception as e:
return ToolResult(error=f"Error executing tool: {str(e)}")
class MCPClients(ToolCollection):
"""
A collection of tools that connects to an MCP server and manages available tools through the Model Context Protocol.
"""
session: Optional[ClientSession] = None
exit_stack: AsyncExitStack = None
description: str = "MCP client tools for server interaction"
def __init__(self):
super().__init__() # Initialize with empty tools list
self.name = "mcp" # Keep name for backward compatibility
self.exit_stack = AsyncExitStack()
async def connect_sse(self, server_url: str) -> None:
"""Connect to an MCP server using SSE transport."""
if not server_url:
raise ValueError("Server URL is required.")
if self.session:
await self.disconnect()
streams_context = sse_client(url=server_url)
streams = await self.exit_stack.enter_async_context(streams_context)
self.session = await self.exit_stack.enter_async_context(
ClientSession(*streams)
)
await self._initialize_and_list_tools()
async def connect_stdio(self, command: str, args: List[str]) -> None:
"""Connect to an MCP server using stdio transport."""
if not command:
raise ValueError("Server command is required.")
if self.session:
await self.disconnect()
server_params = StdioServerParameters(command=command, args=args)
stdio_transport = await self.exit_stack.enter_async_context(
stdio_client(server_params)
)
read, write = stdio_transport
self.session = await self.exit_stack.enter_async_context(
ClientSession(read, write)
)
await self._initialize_and_list_tools()
async def _initialize_and_list_tools(self) -> None:
"""Initialize session and populate tool map."""
if not self.session:
raise RuntimeError("Session not initialized.")
await self.session.initialize()
response = await self.session.list_tools()
# Clear existing tools
self.tools = tuple()
self.tool_map = {}
# Create proper tool objects for each server tool
for tool in response.tools:
server_tool = MCPClientTool(
name=tool.name,
description=tool.description,
parameters=tool.inputSchema,
session=self.session,
)
self.tool_map[tool.name] = server_tool
self.tools = tuple(self.tool_map.values())
logger.info(
f"Connected to server with tools: {[tool.name for tool in response.tools]}"
)
async def disconnect(self) -> None:
"""Disconnect from the MCP server and clean up resources."""
if self.session and self.exit_stack:
await self.exit_stack.aclose()
self.session = None
self.tools = tuple()
self.tool_map = {}
logger.info("Disconnected from MCP server")

View File

@ -4,6 +4,7 @@ from app.tool.search.bing_search import BingSearchEngine
from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
from app.tool.search.google_search import GoogleSearchEngine from app.tool.search.google_search import GoogleSearchEngine
__all__ = [ __all__ = [
"WebSearchEngine", "WebSearchEngine",
"BaiduSearchEngine", "BaiduSearchEngine",

View File

@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
from app.logger import logger from app.logger import logger
from app.tool.search.base import WebSearchEngine from app.tool.search.base import WebSearchEngine
ABSTRACT_MAX_LENGTH = 300 ABSTRACT_MAX_LENGTH = 300
USER_AGENTS = [ USER_AGENTS = [

View File

@ -8,6 +8,9 @@ from app.tool.base import BaseTool, ToolFailure, ToolResult
class ToolCollection: class ToolCollection:
"""A collection of defined tools.""" """A collection of defined tools."""
class Config:
arbitrary_types_allowed = True
def __init__(self, *tools: BaseTool): def __init__(self, *tools: BaseTool):
self.tools = tools self.tools = tools
self.tool_map = {tool.name: tool for tool in tools} self.tool_map = {tool.name: tool for tool in tools}

View File

@ -4,13 +4,14 @@ from typing import List
from tenacity import retry, stop_after_attempt, wait_exponential from tenacity import retry, stop_after_attempt, wait_exponential
from app.config import config from app.config import config
from app.logger import logger
from app.tool.base import BaseTool from app.tool.base import BaseTool
from app.tool.search import ( from app.tool.search import (
BaiduSearchEngine, BaiduSearchEngine,
BingSearchEngine, BingSearchEngine,
DuckDuckGoSearchEngine, DuckDuckGoSearchEngine,
GoogleSearchEngine, GoogleSearchEngine,
WebSearchEngine WebSearchEngine,
) )
@ -44,6 +45,8 @@ class WebSearch(BaseTool):
async def execute(self, query: str, num_results: int = 10) -> List[str]: async def execute(self, query: str, num_results: int = 10) -> List[str]:
""" """
Execute a Web search and return a list of URLs. Execute a Web search and return a list of URLs.
Tries engines in order based on configuration, falling back if an engine fails with errors.
If all engines fail, it will wait and retry up to the configured number of times.
Args: Args:
query (str): The search query to submit to the search engine. query (str): The search query to submit to the search engine.
@ -52,37 +55,109 @@ class WebSearch(BaseTool):
Returns: Returns:
List[str]: A list of URLs matching the search query. List[str]: A list of URLs matching the search query.
""" """
# Get retry settings from config
retry_delay = 60 # Default to 60 seconds
max_retries = 3 # Default to 3 retries
if config.search_config:
retry_delay = getattr(config.search_config, "retry_delay", 60)
max_retries = getattr(config.search_config, "max_retries", 3)
# Try searching with retries when all engines fail
for retry_count in range(
max_retries + 1
): # +1 because first try is not a retry
links = await self._try_all_engines(query, num_results)
if links:
return links
if retry_count < max_retries:
# All engines failed, wait and retry
logger.warning(
f"All search engines failed. Waiting {retry_delay} seconds before retry {retry_count + 1}/{max_retries}..."
)
await asyncio.sleep(retry_delay)
else:
logger.error(
f"All search engines failed after {max_retries} retries. Giving up."
)
return []
async def _try_all_engines(self, query: str, num_results: int) -> List[str]:
"""
Try all search engines in the configured order.
Args:
query (str): The search query to submit to the search engine.
num_results (int): The number of search results to return.
Returns:
List[str]: A list of URLs matching the search query, or empty list if all engines fail.
"""
engine_order = self._get_engine_order() engine_order = self._get_engine_order()
failed_engines = []
for engine_name in engine_order: for engine_name in engine_order:
engine = self._search_engine[engine_name] engine = self._search_engine[engine_name]
try: try:
logger.info(f"🔎 Attempting search with {engine_name.capitalize()}...")
links = await self._perform_search_with_engine( links = await self._perform_search_with_engine(
engine, query, num_results engine, query, num_results
) )
if links: if links:
if failed_engines:
logger.info(
f"Search successful with {engine_name.capitalize()} after trying: {', '.join(failed_engines)}"
)
return links return links
except Exception as e: except Exception as e:
print(f"Search engine '{engine_name}' failed with error: {e}") failed_engines.append(engine_name.capitalize())
is_rate_limit = "429" in str(e) or "Too Many Requests" in str(e)
if is_rate_limit:
logger.warning(
f"⚠️ {engine_name.capitalize()} search engine rate limit exceeded, trying next engine..."
)
else:
logger.warning(
f"⚠️ {engine_name.capitalize()} search failed with error: {e}"
)
if failed_engines:
logger.error(f"All search engines failed: {', '.join(failed_engines)}")
return [] return []
def _get_engine_order(self) -> List[str]: def _get_engine_order(self) -> List[str]:
""" """
Determines the order in which to try search engines. Determines the order in which to try search engines.
Preferred engine is first (based on configuration), followed by the remaining engines. Preferred engine is first (based on configuration), followed by fallback engines,
and then the remaining engines.
Returns: Returns:
List[str]: Ordered list of search engine names. List[str]: Ordered list of search engine names.
""" """
preferred = "google" preferred = "google"
if config.search_config and config.search_config.engine: fallbacks = []
preferred = config.search_config.engine.lower()
if config.search_config:
if config.search_config.engine:
preferred = config.search_config.engine.lower()
if config.search_config.fallback_engines:
fallbacks = [
engine.lower() for engine in config.search_config.fallback_engines
]
engine_order = [] engine_order = []
# Add preferred engine first
if preferred in self._search_engine: if preferred in self._search_engine:
engine_order.append(preferred) engine_order.append(preferred)
for key in self._search_engine:
if key not in engine_order: # Add configured fallback engines in order
engine_order.append(key) for fallback in fallbacks:
if fallback in self._search_engine and fallback not in engine_order:
engine_order.append(fallback)
return engine_order return engine_order
@retry( @retry(

View File

@ -6,6 +6,14 @@ api_key = "YOUR_API_KEY" # Your API key
max_tokens = 8192 # Maximum number of tokens in the response max_tokens = 8192 # Maximum number of tokens in the response
temperature = 0.0 # Controls randomness temperature = 0.0 # Controls randomness
# [llm] # Amazon Bedrock
# api_type = "aws" # Required
# model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0" # Bedrock supported modelID
# base_url = "bedrock-runtime.us-west-2.amazonaws.com" # Not used now
# max_tokens = 8192
# temperature = 1.0
# api_key = "bear" # Required but not used for Bedrock
# [llm] #AZURE OPENAI: # [llm] #AZURE OPENAI:
# api_type= 'azure' # api_type= 'azure'
# model = "YOUR_MODEL_NAME" #"gpt-4o-mini" # model = "YOUR_MODEL_NAME" #"gpt-4o-mini"
@ -65,6 +73,13 @@ temperature = 0.0 # Controls randomness for vision mod
# [search] # [search]
# Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo". # Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo".
#engine = "Google" #engine = "Google"
# Fallback engine order. Default is ["DuckDuckGo", "Baidu"] - will try in this order after primary engine fails.
#fallback_engines = ["DuckDuckGo", "Baidu"]
# Seconds to wait before retrying all engines again when they all fail due to rate limits. Default is 60.
#retry_delay = 60
# Maximum number of times to retry all engines when all fail. Default is 3.
#max_retries = 3
## Sandbox configuration ## Sandbox configuration
#[sandbox] #[sandbox]

View File

@ -27,3 +27,9 @@ playwright~=1.50.0
docker~=7.1.0 docker~=7.1.0
pytest~=8.3.5 pytest~=8.3.5
pytest-asyncio~=0.25.3 pytest-asyncio~=0.25.3
mcp~=1.4.1
httpx>=0.27.0
tomli>=2.0.0
boto3~=1.37.16

116
run_mcp.py Normal file
View File

@ -0,0 +1,116 @@
#!/usr/bin/env python
import argparse
import asyncio
import sys
from app.agent.mcp import MCPAgent
from app.config import config
from app.logger import logger
class MCPRunner:
"""Runner class for MCP Agent with proper path handling and configuration."""
def __init__(self):
self.root_path = config.root_path
self.server_reference = "app.mcp.server"
self.agent = MCPAgent()
async def initialize(
self,
connection_type: str,
server_url: str | None = None,
) -> None:
"""Initialize the MCP agent with the appropriate connection."""
logger.info(f"Initializing MCPAgent with {connection_type} connection...")
if connection_type == "stdio":
await self.agent.initialize(
connection_type="stdio",
command=sys.executable,
args=["-m", self.server_reference],
)
else: # sse
await self.agent.initialize(connection_type="sse", server_url=server_url)
logger.info(f"Connected to MCP server via {connection_type}")
async def run_interactive(self) -> None:
"""Run the agent in interactive mode."""
print("\nMCP Agent Interactive Mode (type 'exit' to quit)\n")
while True:
user_input = input("\nEnter your request: ")
if user_input.lower() in ["exit", "quit", "q"]:
break
response = await self.agent.run(user_input)
print(f"\nAgent: {response}")
async def run_single_prompt(self, prompt: str) -> None:
"""Run the agent with a single prompt."""
await self.agent.run(prompt)
async def run_default(self) -> None:
"""Run the agent in default mode."""
prompt = input("Enter your prompt: ")
if not prompt.strip():
logger.warning("Empty prompt provided.")
return
logger.warning("Processing your request...")
await self.agent.run(prompt)
logger.info("Request processing completed.")
async def cleanup(self) -> None:
"""Clean up agent resources."""
await self.agent.cleanup()
logger.info("Session ended")
def parse_args() -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Run the MCP Agent")
parser.add_argument(
"--connection",
"-c",
choices=["stdio", "sse"],
default="stdio",
help="Connection type: stdio or sse",
)
parser.add_argument(
"--server-url",
default="http://127.0.0.1:8000/sse",
help="URL for SSE connection",
)
parser.add_argument(
"--interactive", "-i", action="store_true", help="Run in interactive mode"
)
parser.add_argument("--prompt", "-p", help="Single prompt to execute and exit")
return parser.parse_args()
async def run_mcp() -> None:
"""Main entry point for the MCP runner."""
args = parse_args()
runner = MCPRunner()
try:
await runner.initialize(args.connection, args.server_url)
if args.prompt:
await runner.run_single_prompt(args.prompt)
elif args.interactive:
await runner.run_interactive()
else:
await runner.run_default()
except KeyboardInterrupt:
logger.info("Program interrupted by user")
except Exception as e:
logger.error(f"Error running MCPAgent: {str(e)}", exc_info=True)
sys.exit(1)
finally:
await runner.cleanup()
if __name__ == "__main__":
asyncio.run(run_mcp())

11
run_mcp_server.py Normal file
View File

@ -0,0 +1,11 @@
# coding: utf-8
# A shortcut to launch OpenManus MCP server, where its introduction also solves other import issues.
from app.mcp.server import MCPServer, parse_args
if __name__ == "__main__":
args = parse_args()
# Create and run server (maintaining original flow)
server = MCPServer()
server.run(transport=args.transport)