update ToolCallAgent and Manus

This commit is contained in:
liangxinbing 2025-03-18 02:39:11 +08:00
parent c3203e7fa3
commit 2509bc30c4
2 changed files with 78 additions and 9 deletions

View File

@ -1,8 +1,10 @@
from typing import Any
import json
from typing import Any, Optional
from pydantic import Field
from app.agent.toolcall import ToolCallAgent
from app.logger import logger
from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
from app.tool import Terminate, ToolCollection
from app.tool.browser_use_tool import BrowserUseTool
@ -43,3 +45,45 @@ class Manus(ToolCallAgent):
else:
await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
await super()._handle_special_tool(name, result, **kwargs)
async def get_browser_state(self) -> Optional[dict]:
"""Get the current browser state for context in next steps."""
browser_tool = self.available_tools.get_tool(BrowserUseTool().name)
if not browser_tool:
return None
try:
# Get browser state directly from the tool with no context parameter
result = await browser_tool.get_current_state()
if result.error:
logger.debug(f"Browser state error: {result.error}")
return None
# Store screenshot if available
if hasattr(result, "base64_image") and result.base64_image:
self._current_base64_image = result.base64_image
# Parse the state info
return json.loads(result.output)
except Exception as e:
logger.debug(f"Failed to get browser state: {str(e)}")
return None
async def think(self) -> bool:
# Add your custom pre-processing here
browser_state = await self.get_browser_state()
# Modify the next_step_prompt temporarily
original_prompt = self.next_step_prompt
if browser_state and not browser_state.get("error"):
self.next_step_prompt += f"\nCurrent browser state:\nURL: {browser_state.get('url', 'N/A')}\nTitle: {browser_state.get('title', 'N/A')}\n"
# Call parent implementation
result = await super().think()
# Restore original prompt
self.next_step_prompt = original_prompt
return result

View File

@ -30,6 +30,7 @@ class ToolCallAgent(ReActAgent):
special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
tool_calls: List[ToolCall] = Field(default_factory=list)
_current_base64_image: Optional[str] = None
max_steps: int = 30
max_observe: Optional[Union[int, bool]] = None
@ -44,9 +45,11 @@ class ToolCallAgent(ReActAgent):
# Get response with tool options
response = await self.llm.ask_tool(
messages=self.messages,
system_msgs=[Message.system_message(self.system_prompt)]
if self.system_prompt
else None,
system_msgs=(
[Message.system_message(self.system_prompt)]
if self.system_prompt
else None
),
tools=self.available_tools.to_params(),
tool_choice=self.tool_choices,
)
@ -79,6 +82,9 @@ class ToolCallAgent(ReActAgent):
logger.info(
f"🧰 Tools being prepared: {[call.function.name for call in response.tool_calls]}"
)
logger.info(
f"🔧 Tool arguments: {response.tool_calls[0].function.arguments}"
)
try:
# Handle different tool_choices modes
@ -130,6 +136,9 @@ class ToolCallAgent(ReActAgent):
results = []
for command in self.tool_calls:
# Reset base64_image for each tool call
self._current_base64_image = None
result = await self.execute_tool(command)
if self.max_observe:
@ -141,7 +150,10 @@ class ToolCallAgent(ReActAgent):
# Add tool response to memory
tool_msg = Message.tool_message(
content=result, tool_call_id=command.id, name=command.function.name
content=result,
tool_call_id=command.id,
name=command.function.name,
base64_image=self._current_base64_image,
)
self.memory.add_message(tool_msg)
results.append(result)
@ -165,16 +177,29 @@ class ToolCallAgent(ReActAgent):
logger.info(f"🔧 Activating tool: '{name}'...")
result = await self.available_tools.execute(name=name, tool_input=args)
# Format result for display
# Handle special tools
await self._handle_special_tool(name=name, result=result)
# Check if result is a ToolResult with base64_image
if hasattr(result, "base64_image") and result.base64_image:
# Store the base64_image for later use in tool_message
self._current_base64_image = result.base64_image
# Format result for display
observation = (
f"Observed output of cmd `{name}` executed:\n{str(result)}"
if result
else f"Cmd `{name}` completed with no output"
)
return observation
# Format result for display (standard case)
observation = (
f"Observed output of cmd `{name}` executed:\n{str(result)}"
if result
else f"Cmd `{name}` completed with no output"
)
# Handle special tools like `finish`
await self._handle_special_tool(name=name, result=result)
return observation
except json.JSONDecodeError:
error_msg = f"Error parsing arguments for {name}: Invalid JSON format"