95 lines
3.2 KiB
Python
95 lines
3.2 KiB
Python
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
from pydantic import Field
|
|
|
|
from app.agent.toolcall import ToolCallAgent
|
|
from app.logger import logger
|
|
from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
|
|
from app.tool import Terminate, ToolCollection
|
|
from app.tool.browser_use_tool import BrowserUseTool
|
|
from app.tool.python_execute import PythonExecute
|
|
from app.tool.str_replace_editor import StrReplaceEditor
|
|
|
|
|
|
initial_working_directory = Path(os.getcwd()) / "workspace"
|
|
|
|
|
|
class Manus(ToolCallAgent):
|
|
"""
|
|
A versatile general-purpose agent that uses planning to solve various tasks.
|
|
|
|
This agent extends PlanningAgent with a comprehensive set of tools and capabilities,
|
|
including Python execution, web browsing, file operations, and information retrieval
|
|
to handle a wide range of user requests.
|
|
"""
|
|
|
|
name: str = "Manus"
|
|
description: str = (
|
|
"A versatile agent that can solve various tasks using multiple tools"
|
|
)
|
|
|
|
system_prompt: str = SYSTEM_PROMPT.format(directory=initial_working_directory)
|
|
next_step_prompt: str = NEXT_STEP_PROMPT
|
|
|
|
max_observe: int = 10000
|
|
max_steps: int = 20
|
|
|
|
# Add general-purpose tools to the tool collection
|
|
available_tools: ToolCollection = Field(
|
|
default_factory=lambda: ToolCollection(
|
|
PythonExecute(), BrowserUseTool(), StrReplaceEditor(), Terminate()
|
|
)
|
|
)
|
|
|
|
async def _handle_special_tool(self, name: str, result: Any, **kwargs):
|
|
if not self._is_special_tool(name):
|
|
return
|
|
else:
|
|
await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
|
|
await super()._handle_special_tool(name, result, **kwargs)
|
|
|
|
async def get_browser_state(self) -> Optional[dict]:
|
|
"""Get the current browser state for context in next steps."""
|
|
browser_tool = self.available_tools.get_tool(BrowserUseTool().name)
|
|
if not browser_tool:
|
|
return None
|
|
|
|
try:
|
|
# Get browser state directly from the tool with no context parameter
|
|
result = await browser_tool.get_current_state()
|
|
|
|
if result.error:
|
|
logger.debug(f"Browser state error: {result.error}")
|
|
return None
|
|
|
|
# Store screenshot if available
|
|
if hasattr(result, "base64_image") and result.base64_image:
|
|
self._current_base64_image = result.base64_image
|
|
|
|
# Parse the state info
|
|
return json.loads(result.output)
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Failed to get browser state: {str(e)}")
|
|
return None
|
|
|
|
async def think(self) -> bool:
|
|
# Add your custom pre-processing here
|
|
browser_state = await self.get_browser_state()
|
|
|
|
# Modify the next_step_prompt temporarily
|
|
original_prompt = self.next_step_prompt
|
|
if browser_state and not browser_state.get("error"):
|
|
self.next_step_prompt += f"\nCurrent browser state:\nURL: {browser_state.get('url', 'N/A')}\nTitle: {browser_state.get('title', 'N/A')}\n"
|
|
|
|
# Call parent implementation
|
|
result = await super().think()
|
|
|
|
# Restore original prompt
|
|
self.next_step_prompt = original_prompt
|
|
|
|
return result
|