merge: main from upstream

This commit is contained in:
Sheng Fan 2025-03-19 13:34:43 +08:00
commit 4ea7f8e988
28 changed files with 2810 additions and 258 deletions

3
.gitignore vendored
View File

@ -194,3 +194,6 @@ cython_debug/
# Built Visual Studio Code Extensions
*.vsix
# OSX
.DS_Store

View File

@ -1,4 +1,5 @@
from app.agent.base import BaseAgent
from app.agent.browser import BrowserAgent
from app.agent.planning import PlanningAgent
from app.agent.react import ReActAgent
from app.agent.swe import SWEAgent
@ -7,6 +8,7 @@ from app.agent.toolcall import ToolCallAgent
__all__ = [
"BaseAgent",
"BrowserAgent",
"PlanningAgent",
"ReActAgent",
"SWEAgent",

View File

@ -6,6 +6,7 @@ from pydantic import BaseModel, Field, model_validator
from app.llm import LLM
from app.logger import logger
from app.sandbox.client import SANDBOX_CLIENT
from app.schema import ROLE_TYPE, AgentState, Memory, Message
@ -149,7 +150,7 @@ class BaseAgent(BaseModel, ABC):
self.current_step = 0
self.state = AgentState.IDLE
results.append(f"Terminated: Reached max steps ({self.max_steps})")
await SANDBOX_CLIENT.cleanup()
return "\n".join(results) if results else "No steps executed"
@abstractmethod

129
app/agent/browser.py Normal file
View File

@ -0,0 +1,129 @@
import json
from typing import Any, Optional
from pydantic import Field
from app.agent.toolcall import ToolCallAgent
from app.logger import logger
from app.prompt.browser import NEXT_STEP_PROMPT, SYSTEM_PROMPT
from app.schema import Message, ToolChoice
from app.tool import BrowserUseTool, Terminate, ToolCollection
class BrowserAgent(ToolCallAgent):
"""
A browser agent that uses the browser_use library to control a browser.
This agent can navigate web pages, interact with elements, fill forms,
extract content, and perform other browser-based actions to accomplish tasks.
"""
name: str = "browser"
description: str = "A browser agent that can control a browser to accomplish tasks"
system_prompt: str = SYSTEM_PROMPT
next_step_prompt: str = NEXT_STEP_PROMPT
max_observe: int = 10000
max_steps: int = 20
# Configure the available tools
available_tools: ToolCollection = Field(
default_factory=lambda: ToolCollection(BrowserUseTool(), Terminate())
)
# Use Auto for tool choice to allow both tool usage and free-form responses
tool_choices: ToolChoice = ToolChoice.AUTO
special_tool_names: list[str] = Field(default_factory=lambda: [Terminate().name])
_current_base64_image: Optional[str] = None
async def _handle_special_tool(self, name: str, result: Any, **kwargs):
if not self._is_special_tool(name):
return
else:
await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
await super()._handle_special_tool(name, result, **kwargs)
async def get_browser_state(self) -> Optional[dict]:
"""Get the current browser state for context in next steps."""
browser_tool = self.available_tools.get_tool(BrowserUseTool().name)
if not browser_tool:
return None
try:
# Get browser state directly from the tool
result = await browser_tool.get_current_state()
if result.error:
logger.debug(f"Browser state error: {result.error}")
return None
# Store screenshot if available
if hasattr(result, "base64_image") and result.base64_image:
self._current_base64_image = result.base64_image
# Parse the state info
return json.loads(result.output)
except Exception as e:
logger.debug(f"Failed to get browser state: {str(e)}")
return None
async def think(self) -> bool:
"""Process current state and decide next actions using tools, with browser state info added"""
# Add browser state to the context
browser_state = await self.get_browser_state()
# Initialize placeholder values
url_info = ""
tabs_info = ""
content_above_info = ""
content_below_info = ""
results_info = ""
if browser_state and not browser_state.get("error"):
# URL and title info
url_info = f"\n URL: {browser_state.get('url', 'N/A')}\n Title: {browser_state.get('title', 'N/A')}"
# Tab information
if "tabs" in browser_state:
tabs = browser_state.get("tabs", [])
if tabs:
tabs_info = f"\n {len(tabs)} tab(s) available"
# Content above/below viewport
pixels_above = browser_state.get("pixels_above", 0)
pixels_below = browser_state.get("pixels_below", 0)
if pixels_above > 0:
content_above_info = f" ({pixels_above} pixels)"
if pixels_below > 0:
content_below_info = f" ({pixels_below} pixels)"
# Add screenshot as base64 if available
if self._current_base64_image:
# Create a message with image attachment
image_message = Message.user_message(
content="Current browser screenshot:",
base64_image=self._current_base64_image,
)
self.memory.add_message(image_message)
# Replace placeholders with actual browser state info
self.next_step_prompt = NEXT_STEP_PROMPT.format(
url_placeholder=url_info,
tabs_placeholder=tabs_info,
content_above_placeholder=content_above_info,
content_below_placeholder=content_below_info,
results_placeholder=results_info,
)
# Call parent implementation
result = await super().think()
# Reset the next_step_prompt to its original state
self.next_step_prompt = NEXT_STEP_PROMPT
return result

View File

@ -1,12 +1,8 @@
import json
import os
from pathlib import Path
from typing import Any, Optional
from pydantic import Field
from app.agent.toolcall import ToolCallAgent
from app.logger import logger
from app.agent.browser import BrowserAgent
from app.config import config
from app.prompt.browser import NEXT_STEP_PROMPT as BROWSER_NEXT_STEP_PROMPT
from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
from app.tool import Terminate, ToolCollection
from app.tool.browser_use_tool import BrowserUseTool
@ -14,14 +10,11 @@ from app.tool.python_execute import PythonExecute
from app.tool.str_replace_editor import StrReplaceEditor
initial_working_directory = Path(os.getcwd()) / "workspace"
class Manus(ToolCallAgent):
class Manus(BrowserAgent):
"""
A versatile general-purpose agent that uses planning to solve various tasks.
This agent extends PlanningAgent with a comprehensive set of tools and capabilities,
This agent extends BrowserAgent with a comprehensive set of tools and capabilities,
including Python execution, web browsing, file operations, and information retrieval
to handle a wide range of user requests.
"""
@ -31,7 +24,7 @@ class Manus(ToolCallAgent):
"A versatile agent that can solve various tasks using multiple tools"
)
system_prompt: str = SYSTEM_PROMPT.format(directory=initial_working_directory)
system_prompt: str = SYSTEM_PROMPT.format(directory=config.workspace_root)
next_step_prompt: str = NEXT_STEP_PROMPT
max_observe: int = 10000
@ -44,48 +37,24 @@ class Manus(ToolCallAgent):
)
)
async def _handle_special_tool(self, name: str, result: Any, **kwargs):
if not self._is_special_tool(name):
return
else:
await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
await super()._handle_special_tool(name, result, **kwargs)
async def get_browser_state(self) -> Optional[dict]:
"""Get the current browser state for context in next steps."""
browser_tool = self.available_tools.get_tool(BrowserUseTool().name)
if not browser_tool:
return None
try:
# Get browser state directly from the tool with no context parameter
result = await browser_tool.get_current_state()
if result.error:
logger.debug(f"Browser state error: {result.error}")
return None
# Store screenshot if available
if hasattr(result, "base64_image") and result.base64_image:
self._current_base64_image = result.base64_image
# Parse the state info
return json.loads(result.output)
except Exception as e:
logger.debug(f"Failed to get browser state: {str(e)}")
return None
async def think(self) -> bool:
# Add your custom pre-processing here
browser_state = await self.get_browser_state()
# Modify the next_step_prompt temporarily
"""Process current state and decide next actions with appropriate context."""
# Store original prompt
original_prompt = self.next_step_prompt
if browser_state and not browser_state.get("error"):
self.next_step_prompt += f"\nCurrent browser state:\nURL: {browser_state.get('url', 'N/A')}\nTitle: {browser_state.get('title', 'N/A')}\n"
# Call parent implementation
# Only check recent messages (last 3) for browser activity
recent_messages = self.memory.messages[-3:] if self.memory.messages else []
browser_in_use = any(
"browser_use" in msg.content.lower()
for msg in recent_messages
if hasattr(msg, "content") and isinstance(msg.content, str)
)
if browser_in_use:
# Override with browser-specific prompt temporarily to get browser context
self.next_step_prompt = BROWSER_NEXT_STEP_PROMPT
# Call parent's think method
result = await super().think()
# Restore original prompt

View File

@ -25,7 +25,7 @@ class LLMSettings(BaseModel):
description="Maximum input tokens to use across all requests (None for unlimited)",
)
temperature: float = Field(1.0, description="Sampling temperature")
api_type: str = Field(..., description="AzureOpenai or Openai")
api_type: str = Field(..., description="Azure, Openai, or Ollama")
api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
@ -64,8 +64,25 @@ class BrowserSettings(BaseModel):
)
class SandboxSettings(BaseModel):
"""Configuration for the execution sandbox"""
use_sandbox: bool = Field(False, description="Whether to use the sandbox")
image: str = Field("python:3.12-slim", description="Base image")
work_dir: str = Field("/workspace", description="Container working directory")
memory_limit: str = Field("512m", description="Memory limit")
cpu_limit: float = Field(1.0, description="CPU limit")
timeout: int = Field(300, description="Default command timeout (seconds)")
network_enabled: bool = Field(
False, description="Whether network access is allowed"
)
class AppConfig(BaseModel):
llm: Dict[str, LLMSettings]
sandbox: Optional[SandboxSettings] = Field(
None, description="Sandbox configuration"
)
browser_config: Optional[BrowserSettings] = Field(
None, description="Browser configuration"
)
@ -168,6 +185,11 @@ class Config:
search_settings = None
if search_config:
search_settings = SearchSettings(**search_config)
sandbox_config = raw_config.get("sandbox", {})
if sandbox_config:
sandbox_settings = SandboxSettings(**sandbox_config)
else:
sandbox_settings = SandboxSettings()
config_dict = {
"llm": {
@ -177,6 +199,7 @@ class Config:
for name, override_config in llm_overrides.items()
},
},
"sandbox": sandbox_settings,
"browser_config": browser_settings,
"search_config": search_settings,
}
@ -187,6 +210,10 @@ class Config:
def llm(self) -> Dict[str, LLMSettings]:
return self._config.llm
@property
def sandbox(self) -> SandboxSettings:
return self._config.sandbox
@property
def browser_config(self) -> Optional[BrowserSettings]:
return self._config.browser_config
@ -195,5 +222,10 @@ class Config:
def search_config(self) -> Optional[SearchSettings]:
return self._config.search_config
@property
def workspace_root(self) -> Path:
"""Get the workspace root directory"""
return WORKSPACE_ROOT
config = Config()

View File

@ -30,6 +30,14 @@ from app.schema import (
REASONING_MODELS = ["o1", "o3-mini"]
MULTIMODAL_MODELS = [
"gpt-4-vision-preview",
"gpt-4o",
"gpt-4o-mini",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
"claude-3-haiku-20240307",
]
class TokenCounter:
@ -196,6 +204,7 @@ class LLM:
# Add token counting related attributes
self.total_input_tokens = 0
self.total_completion_tokens = 0
self.max_input_tokens = (
llm_config.max_input_tokens
if hasattr(llm_config, "max_input_tokens")
@ -229,12 +238,15 @@ class LLM:
def count_message_tokens(self, messages: List[dict]) -> int:
return self.token_counter.count_message_tokens(messages)
def update_token_count(self, input_tokens: int) -> None:
def update_token_count(self, input_tokens: int, completion_tokens: int = 0) -> None:
"""Update token counts"""
# Only track tokens if max_input_tokens is set
self.total_input_tokens += input_tokens
self.total_completion_tokens += completion_tokens
logger.info(
f"Token usage: Input={input_tokens}, Cumulative Input={self.total_input_tokens}"
f"Token usage: Input={input_tokens}, Completion={completion_tokens}, "
f"Cumulative Input={self.total_input_tokens}, Cumulative Completion={self.total_completion_tokens}, "
f"Total={input_tokens + completion_tokens}, Cumulative Total={self.total_input_tokens + self.total_completion_tokens}"
)
def check_token_limit(self, input_tokens: int) -> bool:
@ -255,12 +267,15 @@ class LLM:
return "Token limit exceeded"
@staticmethod
def format_messages(messages: List[Union[dict, Message]]) -> List[dict]:
def format_messages(
messages: List[Union[dict, Message]], supports_images: bool = False
) -> List[dict]:
"""
Format messages for LLM by converting them to OpenAI message format.
Args:
messages: List of messages that can be either dict or Message objects
supports_images: Flag indicating if the target model supports image inputs
Returns:
List[dict]: List of formatted messages in OpenAI format
@ -284,20 +299,20 @@ class LLM:
if isinstance(message, Message):
message = message.to_dict()
if not isinstance(message, dict):
raise TypeError(f"Unsupported message type: {type(message)}")
# Validate required fields
if isinstance(message, dict):
# If message is a dict, ensure it has required fields
if "role" not in message:
raise ValueError("Message dict must contain 'role' field")
# Process base64 images if present
if message.get("base64_image"):
# Process base64 images if present and model supports images
if supports_images and message.get("base64_image"):
# Initialize or convert content to appropriate format
if not message.get("content"):
message["content"] = []
elif isinstance(message["content"], str):
message["content"] = [{"type": "text", "text": message["content"]}]
message["content"] = [
{"type": "text", "text": message["content"]}
]
elif isinstance(message["content"], list):
# Convert string items to proper text objects
message["content"] = [
@ -321,17 +336,21 @@ class LLM:
# Remove the base64_image field
del message["base64_image"]
# If model doesn't support images but message has base64_image, handle gracefully
elif not supports_images and message.get("base64_image"):
# Just remove the base64_image field and keep the text content
del message["base64_image"]
# Only include messages with content or tool_calls
if "content" in message or "tool_calls" in message:
formatted_messages.append(message)
# else: do not include the message
else:
raise TypeError(f"Unsupported message type: {type(message)}")
# Validate all roles
invalid_roles = [
msg for msg in formatted_messages if msg["role"] not in ROLE_VALUES
]
if invalid_roles:
raise ValueError(f"Invalid role: {invalid_roles[0]['role']}")
# Validate all messages have required fields
for msg in formatted_messages:
if msg["role"] not in ROLE_VALUES:
raise ValueError(f"Invalid role: {msg['role']}")
return formatted_messages
@ -368,12 +387,15 @@ class LLM:
Exception: For unexpected errors
"""
try:
# Format system and user messages
# Check if the model supports images
supports_images = self.model in MULTIMODAL_MODELS
# Format system and user messages with image support check
if system_msgs:
system_msgs = self.format_messages(system_msgs)
messages = system_msgs + self.format_messages(messages)
system_msgs = self.format_messages(system_msgs, supports_images)
messages = system_msgs + self.format_messages(messages, supports_images)
else:
messages = self.format_messages(messages)
messages = self.format_messages(messages, supports_images)
# Calculate input token count
input_tokens = self.count_message_tokens(messages)
@ -407,8 +429,9 @@ class LLM:
raise ValueError("Empty or invalid response from LLM")
# Update token counts
if response.usage:
self.update_token_count(response.usage.prompt_tokens)
self.update_token_count(
response.usage.prompt_tokens, response.usage.completion_tokens
)
return response.choices[0].message.content
@ -418,9 +441,11 @@ class LLM:
response = await self.client.chat.completions.create(**params, stream=True)
collected_messages = []
completion_text = ""
async for chunk in response:
chunk_message = chunk.choices[0].delta.content or ""
collected_messages.append(chunk_message)
completion_text += chunk_message
print(chunk_message, end="", flush=True)
print() # Newline after streaming
@ -428,7 +453,12 @@ class LLM:
if not full_response:
raise ValueError("Empty response from streaming LLM")
# TODO Update token counts
# estimate completion tokens for streaming response
completion_tokens = self.count_tokens(completion_text)
logger.info(
f"Estimated completion tokens for streaming response: {completion_tokens}"
)
self.total_completion_tokens += completion_tokens
return full_response
@ -486,8 +516,15 @@ class LLM:
Exception: For unexpected errors
"""
try:
# Format messages
formatted_messages = self.format_messages(messages)
# For ask_with_images, we always set supports_images to True because
# this method should only be called with models that support images
if self.model not in MULTIMODAL_MODELS:
raise ValueError(
f"Model {self.model} does not support images. Use a model from {MULTIMODAL_MODELS}"
)
# Format messages with image support
formatted_messages = self.format_messages(messages, supports_images=True)
# Ensure the last message is from the user to attach images
if not formatted_messages or formatted_messages[-1]["role"] != "user":
@ -526,7 +563,10 @@ class LLM:
# Add system messages if provided
if system_msgs:
all_messages = self.format_messages(system_msgs) + formatted_messages
all_messages = (
self.format_messages(system_msgs, supports_images=True)
+ formatted_messages
)
else:
all_messages = formatted_messages
@ -640,12 +680,15 @@ class LLM:
if tool_choice not in TOOL_CHOICE_VALUES:
raise ValueError(f"Invalid tool_choice: {tool_choice}")
# Check if the model supports images
supports_images = self.model in MULTIMODAL_MODELS
# Format messages
if system_msgs:
system_msgs = self.format_messages(system_msgs)
messages = system_msgs + self.format_messages(messages)
system_msgs = self.format_messages(system_msgs, supports_images)
messages = system_msgs + self.format_messages(messages, supports_images)
else:
messages = self.format_messages(messages)
messages = self.format_messages(messages, supports_images)
# Calculate input token count
input_tokens = self.count_message_tokens(messages)
@ -696,7 +739,9 @@ class LLM:
raise ValueError("Invalid or empty response from LLM")
# Update token counts
self.update_token_count(response.usage.prompt_tokens)
self.update_token_count(
response.usage.prompt_tokens, response.usage.completion_tokens
)
return response.choices[0].message

92
app/prompt/browser.py Normal file
View File

@ -0,0 +1,92 @@
SYSTEM_PROMPT = """\
You are an AI agent designed to automate browser tasks. Your goal is to accomplish the ultimate task following the rules.
# Input Format
Task
Previous steps
Current URL
Open Tabs
Interactive Elements
[index]<type>text</type>
- index: Numeric identifier for interaction
- type: HTML element type (button, input, etc.)
- text: Element description
Example:
[33]<button>Submit Form</button>
- Only elements with numeric indexes in [] are interactive
- elements without [] provide only context
# Response Rules
1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
{{"current_state": {{"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not",
"memory": "Description of what has been done and what you need to remember. Be very specific. Count here ALWAYS how many times you have done something and how many remain. E.g. 0 out of 10 websites analyzed. Continue with abc and xyz",
"next_goal": "What needs to be done with the next immediate action"}},
"action":[{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence]}}
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
Common action sequences:
- Form filling: [{{"input_text": {{"index": 1, "text": "username"}}}}, {{"input_text": {{"index": 2, "text": "password"}}}}, {{"click_element": {{"index": 3}}}}]
- Navigation and extraction: [{{"go_to_url": {{"url": "https://example.com"}}}}, {{"extract_content": {{"goal": "extract the names"}}}}]
- Actions are executed in the given order
- If the page changes after an action, the sequence is interrupted and you get the new state.
- Only provide the action sequence until an action which changes the page state significantly.
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
- only use multiple actions if it makes sense.
3. ELEMENT INTERACTION:
- Only use indexes of the interactive elements
- Elements marked with "[]Non-interactive text" are non-interactive
4. NAVIGATION & ERROR HANDLING:
- If no suitable elements exist, use other functions to complete the task
- If stuck, try alternative approaches - like going back to a previous page, new search, new tab etc.
- Handle popups/cookies by accepting or closing them
- Use scroll to find elements you are looking for
- If you want to research something, open a new tab instead of using the current tab
- If captcha pops up, try to solve it - else try a different approach
- If the page is not fully loaded, use wait action
5. TASK COMPLETION:
- Use the done action as the last action as soon as the ultimate task is complete
- Dont use "done" before you are done with everything the user asked you, except you reach the last step of max_steps.
- If you reach your last step, use the done action even if the task is not fully finished. Provide all the information you have gathered so far. If the ultimate task is completly finished set success to true. If not everything the user asked for is completed set success in done to false!
- If you have to do something repeatedly for example the task says for "each", or "for all", or "x times", count always inside "memory" how many times you have done it and how many remain. Don't stop until you have completed like the task asked you. Only call done after the last step.
- Don't hallucinate actions
- Make sure you include everything you found out for the ultimate task in the done text parameter. Do not just say you are done, but include the requested information of the task.
6. VISUAL CONTEXT:
- When an image is provided, use it to understand the page layout
- Bounding boxes with labels on their top right corner correspond to element indexes
7. Form filling:
- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
8. Long tasks:
- Keep track of the status and subresults in the memory.
9. Extraction:
- If your task is to find information - call extract_content on the specific pages to get and store the information.
Your responses must be always JSON with the specified format.
"""
NEXT_STEP_PROMPT = """
What should I do next to achieve my goal?
When you see [Current state starts here], focus on the following:
- Current URL and page title{url_placeholder}
- Available tabs{tabs_placeholder}
- Interactive elements and their indices
- Content above{content_above_placeholder} or below{content_below_placeholder} the viewport (if indicated)
- Any action results or errors{results_placeholder}
For browser interactions:
- To navigate: browser_use with action="go_to_url", url="..."
- To click: browser_use with action="click_element", index=N
- To type: browser_use with action="input_text", index=N, text="..."
- To extract: browser_use with action="extract_content", goal="..."
- To scroll: browser_use with action="scroll_down" or "scroll_up"
Consider both what's visible and what might be beyond the current viewport.
Be methodical - remember your progress and what you've learned so far.
"""

30
app/sandbox/__init__.py Normal file
View File

@ -0,0 +1,30 @@
"""
Docker Sandbox Module
Provides secure containerized execution environment with resource limits
and isolation for running untrusted code.
"""
from app.sandbox.client import (
BaseSandboxClient,
LocalSandboxClient,
create_sandbox_client,
)
from app.sandbox.core.exceptions import (
SandboxError,
SandboxResourceError,
SandboxTimeoutError,
)
from app.sandbox.core.manager import SandboxManager
from app.sandbox.core.sandbox import DockerSandbox
__all__ = [
"DockerSandbox",
"SandboxManager",
"BaseSandboxClient",
"LocalSandboxClient",
"create_sandbox_client",
"SandboxError",
"SandboxTimeoutError",
"SandboxResourceError",
]

201
app/sandbox/client.py Normal file
View File

@ -0,0 +1,201 @@
from abc import ABC, abstractmethod
from typing import Dict, Optional, Protocol
from app.config import SandboxSettings
from app.sandbox.core.sandbox import DockerSandbox
class SandboxFileOperations(Protocol):
"""Protocol for sandbox file operations."""
async def copy_from(self, container_path: str, local_path: str) -> None:
"""Copies file from container to local.
Args:
container_path: File path in container.
local_path: Local destination path.
"""
...
async def copy_to(self, local_path: str, container_path: str) -> None:
"""Copies file from local to container.
Args:
local_path: Local source file path.
container_path: Destination path in container.
"""
...
async def read_file(self, path: str) -> str:
"""Reads file content from container.
Args:
path: File path in container.
Returns:
str: File content.
"""
...
async def write_file(self, path: str, content: str) -> None:
"""Writes content to file in container.
Args:
path: File path in container.
content: Content to write.
"""
...
class BaseSandboxClient(ABC):
"""Base sandbox client interface."""
@abstractmethod
async def create(
self,
config: Optional[SandboxSettings] = None,
volume_bindings: Optional[Dict[str, str]] = None,
) -> None:
"""Creates sandbox."""
@abstractmethod
async def run_command(self, command: str, timeout: Optional[int] = None) -> str:
"""Executes command."""
@abstractmethod
async def copy_from(self, container_path: str, local_path: str) -> None:
"""Copies file from container."""
@abstractmethod
async def copy_to(self, local_path: str, container_path: str) -> None:
"""Copies file to container."""
@abstractmethod
async def read_file(self, path: str) -> str:
"""Reads file."""
@abstractmethod
async def write_file(self, path: str, content: str) -> None:
"""Writes file."""
@abstractmethod
async def cleanup(self) -> None:
"""Cleans up resources."""
class LocalSandboxClient(BaseSandboxClient):
"""Local sandbox client implementation."""
def __init__(self):
"""Initializes local sandbox client."""
self.sandbox: Optional[DockerSandbox] = None
async def create(
self,
config: Optional[SandboxSettings] = None,
volume_bindings: Optional[Dict[str, str]] = None,
) -> None:
"""Creates a sandbox.
Args:
config: Sandbox configuration.
volume_bindings: Volume mappings.
Raises:
RuntimeError: If sandbox creation fails.
"""
self.sandbox = DockerSandbox(config, volume_bindings)
await self.sandbox.create()
async def run_command(self, command: str, timeout: Optional[int] = None) -> str:
"""Runs command in sandbox.
Args:
command: Command to execute.
timeout: Execution timeout in seconds.
Returns:
Command output.
Raises:
RuntimeError: If sandbox not initialized.
"""
if not self.sandbox:
raise RuntimeError("Sandbox not initialized")
return await self.sandbox.run_command(command, timeout)
async def copy_from(self, container_path: str, local_path: str) -> None:
"""Copies file from container to local.
Args:
container_path: File path in container.
local_path: Local destination path.
Raises:
RuntimeError: If sandbox not initialized.
"""
if not self.sandbox:
raise RuntimeError("Sandbox not initialized")
await self.sandbox.copy_from(container_path, local_path)
async def copy_to(self, local_path: str, container_path: str) -> None:
"""Copies file from local to container.
Args:
local_path: Local source file path.
container_path: Destination path in container.
Raises:
RuntimeError: If sandbox not initialized.
"""
if not self.sandbox:
raise RuntimeError("Sandbox not initialized")
await self.sandbox.copy_to(local_path, container_path)
async def read_file(self, path: str) -> str:
"""Reads file from container.
Args:
path: File path in container.
Returns:
File content.
Raises:
RuntimeError: If sandbox not initialized.
"""
if not self.sandbox:
raise RuntimeError("Sandbox not initialized")
return await self.sandbox.read_file(path)
async def write_file(self, path: str, content: str) -> None:
"""Writes file to container.
Args:
path: File path in container.
content: File content.
Raises:
RuntimeError: If sandbox not initialized.
"""
if not self.sandbox:
raise RuntimeError("Sandbox not initialized")
await self.sandbox.write_file(path, content)
async def cleanup(self) -> None:
"""Cleans up resources."""
if self.sandbox:
await self.sandbox.cleanup()
self.sandbox = None
def create_sandbox_client() -> LocalSandboxClient:
"""Creates a sandbox client.
Returns:
LocalSandboxClient: Sandbox client instance.
"""
return LocalSandboxClient()
SANDBOX_CLIENT = create_sandbox_client()

View File

@ -0,0 +1,17 @@
"""Exception classes for the sandbox system.
This module defines custom exceptions used throughout the sandbox system to
handle various error conditions in a structured way.
"""
class SandboxError(Exception):
"""Base exception for sandbox-related errors."""
class SandboxTimeoutError(SandboxError):
"""Exception raised when a sandbox operation times out."""
class SandboxResourceError(SandboxError):
"""Exception raised for resource-related errors."""

313
app/sandbox/core/manager.py Normal file
View File

@ -0,0 +1,313 @@
import asyncio
import uuid
from contextlib import asynccontextmanager
from typing import Dict, Optional, Set
import docker
from docker.errors import APIError, ImageNotFound
from app.config import SandboxSettings
from app.logger import logger
from app.sandbox.core.sandbox import DockerSandbox
class SandboxManager:
"""Docker sandbox manager.
Manages multiple DockerSandbox instances lifecycle including creation,
monitoring, and cleanup. Provides concurrent access control and automatic
cleanup mechanisms for sandbox resources.
Attributes:
max_sandboxes: Maximum allowed number of sandboxes.
idle_timeout: Sandbox idle timeout in seconds.
cleanup_interval: Cleanup check interval in seconds.
_sandboxes: Active sandbox instance mapping.
_last_used: Last used time record for sandboxes.
"""
def __init__(
self,
max_sandboxes: int = 100,
idle_timeout: int = 3600,
cleanup_interval: int = 300,
):
"""Initializes sandbox manager.
Args:
max_sandboxes: Maximum sandbox count limit.
idle_timeout: Idle timeout in seconds.
cleanup_interval: Cleanup check interval in seconds.
"""
self.max_sandboxes = max_sandboxes
self.idle_timeout = idle_timeout
self.cleanup_interval = cleanup_interval
# Docker client
self._client = docker.from_env()
# Resource mappings
self._sandboxes: Dict[str, DockerSandbox] = {}
self._last_used: Dict[str, float] = {}
# Concurrency control
self._locks: Dict[str, asyncio.Lock] = {}
self._global_lock = asyncio.Lock()
self._active_operations: Set[str] = set()
# Cleanup task
self._cleanup_task: Optional[asyncio.Task] = None
self._is_shutting_down = False
# Start automatic cleanup
self.start_cleanup_task()
async def ensure_image(self, image: str) -> bool:
"""Ensures Docker image is available.
Args:
image: Image name.
Returns:
bool: Whether image is available.
"""
try:
self._client.images.get(image)
return True
except ImageNotFound:
try:
logger.info(f"Pulling image {image}...")
await asyncio.get_event_loop().run_in_executor(
None, self._client.images.pull, image
)
return True
except (APIError, Exception) as e:
logger.error(f"Failed to pull image {image}: {e}")
return False
@asynccontextmanager
async def sandbox_operation(self, sandbox_id: str):
"""Context manager for sandbox operations.
Provides concurrency control and usage time updates.
Args:
sandbox_id: Sandbox ID.
Raises:
KeyError: If sandbox not found.
"""
if sandbox_id not in self._locks:
self._locks[sandbox_id] = asyncio.Lock()
async with self._locks[sandbox_id]:
if sandbox_id not in self._sandboxes:
raise KeyError(f"Sandbox {sandbox_id} not found")
self._active_operations.add(sandbox_id)
try:
self._last_used[sandbox_id] = asyncio.get_event_loop().time()
yield self._sandboxes[sandbox_id]
finally:
self._active_operations.remove(sandbox_id)
async def create_sandbox(
self,
config: Optional[SandboxSettings] = None,
volume_bindings: Optional[Dict[str, str]] = None,
) -> str:
"""Creates a new sandbox instance.
Args:
config: Sandbox configuration.
volume_bindings: Volume mapping configuration.
Returns:
str: Sandbox ID.
Raises:
RuntimeError: If max sandbox count reached or creation fails.
"""
async with self._global_lock:
if len(self._sandboxes) >= self.max_sandboxes:
raise RuntimeError(
f"Maximum number of sandboxes ({self.max_sandboxes}) reached"
)
config = config or SandboxSettings()
if not await self.ensure_image(config.image):
raise RuntimeError(f"Failed to ensure Docker image: {config.image}")
sandbox_id = str(uuid.uuid4())
try:
sandbox = DockerSandbox(config, volume_bindings)
await sandbox.create()
self._sandboxes[sandbox_id] = sandbox
self._last_used[sandbox_id] = asyncio.get_event_loop().time()
self._locks[sandbox_id] = asyncio.Lock()
logger.info(f"Created sandbox {sandbox_id}")
return sandbox_id
except Exception as e:
logger.error(f"Failed to create sandbox: {e}")
if sandbox_id in self._sandboxes:
await self.delete_sandbox(sandbox_id)
raise RuntimeError(f"Failed to create sandbox: {e}")
async def get_sandbox(self, sandbox_id: str) -> DockerSandbox:
"""Gets a sandbox instance.
Args:
sandbox_id: Sandbox ID.
Returns:
DockerSandbox: Sandbox instance.
Raises:
KeyError: If sandbox does not exist.
"""
async with self.sandbox_operation(sandbox_id) as sandbox:
return sandbox
def start_cleanup_task(self) -> None:
"""Starts automatic cleanup task."""
async def cleanup_loop():
while not self._is_shutting_down:
try:
await self._cleanup_idle_sandboxes()
except Exception as e:
logger.error(f"Error in cleanup loop: {e}")
await asyncio.sleep(self.cleanup_interval)
self._cleanup_task = asyncio.create_task(cleanup_loop())
async def _cleanup_idle_sandboxes(self) -> None:
"""Cleans up idle sandboxes."""
current_time = asyncio.get_event_loop().time()
to_cleanup = []
async with self._global_lock:
for sandbox_id, last_used in self._last_used.items():
if (
sandbox_id not in self._active_operations
and current_time - last_used > self.idle_timeout
):
to_cleanup.append(sandbox_id)
for sandbox_id in to_cleanup:
try:
await self.delete_sandbox(sandbox_id)
except Exception as e:
logger.error(f"Error cleaning up sandbox {sandbox_id}: {e}")
async def cleanup(self) -> None:
"""Cleans up all resources."""
logger.info("Starting manager cleanup...")
self._is_shutting_down = True
# Cancel cleanup task
if self._cleanup_task:
self._cleanup_task.cancel()
try:
await asyncio.wait_for(self._cleanup_task, timeout=1.0)
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
# Get all sandbox IDs to clean up
async with self._global_lock:
sandbox_ids = list(self._sandboxes.keys())
# Concurrently clean up all sandboxes
cleanup_tasks = []
for sandbox_id in sandbox_ids:
task = asyncio.create_task(self._safe_delete_sandbox(sandbox_id))
cleanup_tasks.append(task)
if cleanup_tasks:
# Wait for all cleanup tasks to complete, with timeout to avoid infinite waiting
try:
await asyncio.wait(cleanup_tasks, timeout=30.0)
except asyncio.TimeoutError:
logger.error("Sandbox cleanup timed out")
# Clean up remaining references
self._sandboxes.clear()
self._last_used.clear()
self._locks.clear()
self._active_operations.clear()
logger.info("Manager cleanup completed")
async def _safe_delete_sandbox(self, sandbox_id: str) -> None:
"""Safely deletes a single sandbox.
Args:
sandbox_id: Sandbox ID to delete.
"""
try:
if sandbox_id in self._active_operations:
logger.warning(
f"Sandbox {sandbox_id} has active operations, waiting for completion"
)
for _ in range(10): # Wait at most 10 times
await asyncio.sleep(0.5)
if sandbox_id not in self._active_operations:
break
else:
logger.warning(
f"Timeout waiting for sandbox {sandbox_id} operations to complete"
)
# Get reference to sandbox object
sandbox = self._sandboxes.get(sandbox_id)
if sandbox:
await sandbox.cleanup()
# Remove sandbox record from manager
async with self._global_lock:
self._sandboxes.pop(sandbox_id, None)
self._last_used.pop(sandbox_id, None)
self._locks.pop(sandbox_id, None)
logger.info(f"Deleted sandbox {sandbox_id}")
except Exception as e:
logger.error(f"Error during cleanup of sandbox {sandbox_id}: {e}")
async def delete_sandbox(self, sandbox_id: str) -> None:
"""Deletes specified sandbox.
Args:
sandbox_id: Sandbox ID.
"""
if sandbox_id not in self._sandboxes:
return
try:
await self._safe_delete_sandbox(sandbox_id)
except Exception as e:
logger.error(f"Failed to delete sandbox {sandbox_id}: {e}")
async def __aenter__(self) -> "SandboxManager":
"""Async context manager entry."""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
"""Async context manager exit."""
await self.cleanup()
def get_stats(self) -> Dict:
"""Gets manager statistics.
Returns:
Dict: Statistics information.
"""
return {
"total_sandboxes": len(self._sandboxes),
"active_operations": len(self._active_operations),
"max_sandboxes": self.max_sandboxes,
"idle_timeout": self.idle_timeout,
"cleanup_interval": self.cleanup_interval,
"is_shutting_down": self._is_shutting_down,
}

462
app/sandbox/core/sandbox.py Normal file
View File

@ -0,0 +1,462 @@
import asyncio
import io
import os
import tarfile
import tempfile
import uuid
from typing import Dict, Optional
import docker
from docker.errors import NotFound
from docker.models.containers import Container
from app.config import SandboxSettings
from app.sandbox.core.exceptions import SandboxTimeoutError
from app.sandbox.core.terminal import AsyncDockerizedTerminal
class DockerSandbox:
"""Docker sandbox environment.
Provides a containerized execution environment with resource limits,
file operations, and command execution capabilities.
Attributes:
config: Sandbox configuration.
volume_bindings: Volume mapping configuration.
client: Docker client.
container: Docker container instance.
terminal: Container terminal interface.
"""
def __init__(
self,
config: Optional[SandboxSettings] = None,
volume_bindings: Optional[Dict[str, str]] = None,
):
"""Initializes a sandbox instance.
Args:
config: Sandbox configuration. Default configuration used if None.
volume_bindings: Volume mappings in {host_path: container_path} format.
"""
self.config = config or SandboxSettings()
self.volume_bindings = volume_bindings or {}
self.client = docker.from_env()
self.container: Optional[Container] = None
self.terminal: Optional[AsyncDockerizedTerminal] = None
async def create(self) -> "DockerSandbox":
"""Creates and starts the sandbox container.
Returns:
Current sandbox instance.
Raises:
docker.errors.APIError: If Docker API call fails.
RuntimeError: If container creation or startup fails.
"""
try:
# Prepare container config
host_config = self.client.api.create_host_config(
mem_limit=self.config.memory_limit,
cpu_period=100000,
cpu_quota=int(100000 * self.config.cpu_limit),
network_mode="none" if not self.config.network_enabled else "bridge",
binds=self._prepare_volume_bindings(),
)
# Generate unique container name with sandbox_ prefix
container_name = f"sandbox_{uuid.uuid4().hex[:8]}"
# Create container
container = await asyncio.to_thread(
self.client.api.create_container,
image=self.config.image,
command="tail -f /dev/null",
hostname="sandbox",
working_dir=self.config.work_dir,
host_config=host_config,
name=container_name,
tty=True,
detach=True,
)
self.container = self.client.containers.get(container["Id"])
# Start container
await asyncio.to_thread(self.container.start)
# Initialize terminal
self.terminal = AsyncDockerizedTerminal(
container["Id"],
self.config.work_dir,
env_vars={"PYTHONUNBUFFERED": "1"}
# Ensure Python output is not buffered
)
await self.terminal.init()
return self
except Exception as e:
await self.cleanup() # Ensure resources are cleaned up
raise RuntimeError(f"Failed to create sandbox: {e}") from e
def _prepare_volume_bindings(self) -> Dict[str, Dict[str, str]]:
"""Prepares volume binding configuration.
Returns:
Volume binding configuration dictionary.
"""
bindings = {}
# Create and add working directory mapping
work_dir = self._ensure_host_dir(self.config.work_dir)
bindings[work_dir] = {"bind": self.config.work_dir, "mode": "rw"}
# Add custom volume bindings
for host_path, container_path in self.volume_bindings.items():
bindings[host_path] = {"bind": container_path, "mode": "rw"}
return bindings
@staticmethod
def _ensure_host_dir(path: str) -> str:
"""Ensures directory exists on the host.
Args:
path: Directory path.
Returns:
Actual path on the host.
"""
host_path = os.path.join(
tempfile.gettempdir(),
f"sandbox_{os.path.basename(path)}_{os.urandom(4).hex()}",
)
os.makedirs(host_path, exist_ok=True)
return host_path
async def run_command(self, cmd: str, timeout: Optional[int] = None) -> str:
"""Runs a command in the sandbox.
Args:
cmd: Command to execute.
timeout: Timeout in seconds.
Returns:
Command output as string.
Raises:
RuntimeError: If sandbox not initialized or command execution fails.
TimeoutError: If command execution times out.
"""
if not self.terminal:
raise RuntimeError("Sandbox not initialized")
try:
return await self.terminal.run_command(
cmd, timeout=timeout or self.config.timeout
)
except TimeoutError:
raise SandboxTimeoutError(
f"Command execution timed out after {timeout or self.config.timeout} seconds"
)
async def read_file(self, path: str) -> str:
"""Reads a file from the container.
Args:
path: File path.
Returns:
File contents as string.
Raises:
FileNotFoundError: If file does not exist.
RuntimeError: If read operation fails.
"""
if not self.container:
raise RuntimeError("Sandbox not initialized")
try:
# Get file archive
resolved_path = self._safe_resolve_path(path)
tar_stream, _ = await asyncio.to_thread(
self.container.get_archive, resolved_path
)
# Read file content from tar stream
content = await self._read_from_tar(tar_stream)
return content.decode("utf-8")
except NotFound:
raise FileNotFoundError(f"File not found: {path}")
except Exception as e:
raise RuntimeError(f"Failed to read file: {e}")
async def write_file(self, path: str, content: str) -> None:
"""Writes content to a file in the container.
Args:
path: Target path.
content: File content.
Raises:
RuntimeError: If write operation fails.
"""
if not self.container:
raise RuntimeError("Sandbox not initialized")
try:
resolved_path = self._safe_resolve_path(path)
parent_dir = os.path.dirname(resolved_path)
# Create parent directory
if parent_dir:
await self.run_command(f"mkdir -p {parent_dir}")
# Prepare file data
tar_stream = await self._create_tar_stream(
os.path.basename(path), content.encode("utf-8")
)
# Write file
await asyncio.to_thread(
self.container.put_archive, parent_dir or "/", tar_stream
)
except Exception as e:
raise RuntimeError(f"Failed to write file: {e}")
def _safe_resolve_path(self, path: str) -> str:
"""Safely resolves container path, preventing path traversal.
Args:
path: Original path.
Returns:
Resolved absolute path.
Raises:
ValueError: If path contains potentially unsafe patterns.
"""
# Check for path traversal attempts
if ".." in path.split("/"):
raise ValueError("Path contains potentially unsafe patterns")
resolved = (
os.path.join(self.config.work_dir, path)
if not os.path.isabs(path)
else path
)
return resolved
async def copy_from(self, src_path: str, dst_path: str) -> None:
"""Copies a file from the container.
Args:
src_path: Source file path (container).
dst_path: Destination path (host).
Raises:
FileNotFoundError: If source file does not exist.
RuntimeError: If copy operation fails.
"""
try:
# Ensure destination file's parent directory exists
parent_dir = os.path.dirname(dst_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
# Get file stream
resolved_src = self._safe_resolve_path(src_path)
stream, stat = await asyncio.to_thread(
self.container.get_archive, resolved_src
)
# Create temporary directory to extract file
with tempfile.TemporaryDirectory() as tmp_dir:
# Write stream to temporary file
tar_path = os.path.join(tmp_dir, "temp.tar")
with open(tar_path, "wb") as f:
for chunk in stream:
f.write(chunk)
# Extract file
with tarfile.open(tar_path) as tar:
members = tar.getmembers()
if not members:
raise FileNotFoundError(f"Source file is empty: {src_path}")
# If destination is a directory, we should preserve relative path structure
if os.path.isdir(dst_path):
tar.extractall(dst_path)
else:
# If destination is a file, we only extract the source file's content
if len(members) > 1:
raise RuntimeError(
f"Source path is a directory but destination is a file: {src_path}"
)
with open(dst_path, "wb") as dst:
src_file = tar.extractfile(members[0])
if src_file is None:
raise RuntimeError(
f"Failed to extract file: {src_path}"
)
dst.write(src_file.read())
except docker.errors.NotFound:
raise FileNotFoundError(f"Source file not found: {src_path}")
except Exception as e:
raise RuntimeError(f"Failed to copy file: {e}")
async def copy_to(self, src_path: str, dst_path: str) -> None:
"""Copies a file to the container.
Args:
src_path: Source file path (host).
dst_path: Destination path (container).
Raises:
FileNotFoundError: If source file does not exist.
RuntimeError: If copy operation fails.
"""
try:
if not os.path.exists(src_path):
raise FileNotFoundError(f"Source file not found: {src_path}")
# Create destination directory in container
resolved_dst = self._safe_resolve_path(dst_path)
container_dir = os.path.dirname(resolved_dst)
if container_dir:
await self.run_command(f"mkdir -p {container_dir}")
# Create tar file to upload
with tempfile.TemporaryDirectory() as tmp_dir:
tar_path = os.path.join(tmp_dir, "temp.tar")
with tarfile.open(tar_path, "w") as tar:
# Handle directory source path
if os.path.isdir(src_path):
os.path.basename(src_path.rstrip("/"))
for root, _, files in os.walk(src_path):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.join(
os.path.basename(dst_path),
os.path.relpath(file_path, src_path),
)
tar.add(file_path, arcname=arcname)
else:
# Add single file to tar
tar.add(src_path, arcname=os.path.basename(dst_path))
# Read tar file content
with open(tar_path, "rb") as f:
data = f.read()
# Upload to container
await asyncio.to_thread(
self.container.put_archive,
os.path.dirname(resolved_dst) or "/",
data,
)
# Verify file was created successfully
try:
await self.run_command(f"test -e {resolved_dst}")
except Exception:
raise RuntimeError(f"Failed to verify file creation: {dst_path}")
except FileNotFoundError:
raise
except Exception as e:
raise RuntimeError(f"Failed to copy file: {e}")
@staticmethod
async def _create_tar_stream(name: str, content: bytes) -> io.BytesIO:
"""Creates a tar file stream.
Args:
name: Filename.
content: File content.
Returns:
Tar file stream.
"""
tar_stream = io.BytesIO()
with tarfile.open(fileobj=tar_stream, mode="w") as tar:
tarinfo = tarfile.TarInfo(name=name)
tarinfo.size = len(content)
tar.addfile(tarinfo, io.BytesIO(content))
tar_stream.seek(0)
return tar_stream
@staticmethod
async def _read_from_tar(tar_stream) -> bytes:
"""Reads file content from a tar stream.
Args:
tar_stream: Tar file stream.
Returns:
File content.
Raises:
RuntimeError: If read operation fails.
"""
with tempfile.NamedTemporaryFile() as tmp:
for chunk in tar_stream:
tmp.write(chunk)
tmp.seek(0)
with tarfile.open(fileobj=tmp) as tar:
member = tar.next()
if not member:
raise RuntimeError("Empty tar archive")
file_content = tar.extractfile(member)
if not file_content:
raise RuntimeError("Failed to extract file content")
return file_content.read()
async def cleanup(self) -> None:
"""Cleans up sandbox resources."""
errors = []
try:
if self.terminal:
try:
await self.terminal.close()
except Exception as e:
errors.append(f"Terminal cleanup error: {e}")
finally:
self.terminal = None
if self.container:
try:
await asyncio.to_thread(self.container.stop, timeout=5)
except Exception as e:
errors.append(f"Container stop error: {e}")
try:
await asyncio.to_thread(self.container.remove, force=True)
except Exception as e:
errors.append(f"Container remove error: {e}")
finally:
self.container = None
except Exception as e:
errors.append(f"General cleanup error: {e}")
if errors:
print(f"Warning: Errors during cleanup: {', '.join(errors)}")
async def __aenter__(self) -> "DockerSandbox":
"""Async context manager entry."""
return await self.create()
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
"""Async context manager exit."""
await self.cleanup()

View File

@ -0,0 +1,346 @@
"""
Asynchronous Docker Terminal
This module provides asynchronous terminal functionality for Docker containers,
allowing interactive command execution with timeout control.
"""
import asyncio
import re
import socket
from typing import Dict, Optional, Tuple, Union
import docker
from docker import APIClient
from docker.errors import APIError
from docker.models.containers import Container
class DockerSession:
def __init__(self, container_id: str) -> None:
"""Initializes a Docker session.
Args:
container_id: ID of the Docker container.
"""
self.api = APIClient()
self.container_id = container_id
self.exec_id = None
self.socket = None
async def create(self, working_dir: str, env_vars: Dict[str, str]) -> None:
"""Creates an interactive session with the container.
Args:
working_dir: Working directory inside the container.
env_vars: Environment variables to set.
Raises:
RuntimeError: If socket connection fails.
"""
startup_command = [
"bash",
"-c",
f"cd {working_dir} && "
"PROMPT_COMMAND='' "
"PS1='$ ' "
"exec bash --norc --noprofile",
]
exec_data = self.api.exec_create(
self.container_id,
startup_command,
stdin=True,
tty=True,
stdout=True,
stderr=True,
privileged=True,
user="root",
environment={**env_vars, "TERM": "dumb", "PS1": "$ ", "PROMPT_COMMAND": ""},
)
self.exec_id = exec_data["Id"]
socket_data = self.api.exec_start(
self.exec_id, socket=True, tty=True, stream=True, demux=True
)
if hasattr(socket_data, "_sock"):
self.socket = socket_data._sock
self.socket.setblocking(False)
else:
raise RuntimeError("Failed to get socket connection")
await self._read_until_prompt()
async def close(self) -> None:
"""Cleans up session resources.
1. Sends exit command
2. Closes socket connection
3. Checks and cleans up exec instance
"""
try:
if self.socket:
# Send exit command to close bash session
try:
self.socket.sendall(b"exit\n")
# Allow time for command execution
await asyncio.sleep(0.1)
except:
pass # Ignore sending errors, continue cleanup
# Close socket connection
try:
self.socket.shutdown(socket.SHUT_RDWR)
except:
pass # Some platforms may not support shutdown
self.socket.close()
self.socket = None
if self.exec_id:
try:
# Check exec instance status
exec_inspect = self.api.exec_inspect(self.exec_id)
if exec_inspect.get("Running", False):
# If still running, wait for it to complete
await asyncio.sleep(0.5)
except:
pass # Ignore inspection errors, continue cleanup
self.exec_id = None
except Exception as e:
# Log error but don't raise, ensure cleanup continues
print(f"Warning: Error during session cleanup: {e}")
async def _read_until_prompt(self) -> str:
"""Reads output until prompt is found.
Returns:
String containing output up to the prompt.
Raises:
socket.error: If socket communication fails.
"""
buffer = b""
while b"$ " not in buffer:
try:
chunk = self.socket.recv(4096)
if chunk:
buffer += chunk
except socket.error as e:
if e.errno == socket.EWOULDBLOCK:
await asyncio.sleep(0.1)
continue
raise
return buffer.decode("utf-8")
async def execute(self, command: str, timeout: Optional[int] = None) -> str:
"""Executes a command and returns cleaned output.
Args:
command: Shell command to execute.
timeout: Maximum execution time in seconds.
Returns:
Command output as string with prompt markers removed.
Raises:
RuntimeError: If session not initialized or execution fails.
TimeoutError: If command execution exceeds timeout.
"""
if not self.socket:
raise RuntimeError("Session not initialized")
try:
# Sanitize command to prevent shell injection
sanitized_command = self._sanitize_command(command)
full_command = f"{sanitized_command}\necho $?\n"
self.socket.sendall(full_command.encode())
async def read_output() -> str:
buffer = b""
result_lines = []
command_sent = False
while True:
try:
chunk = self.socket.recv(4096)
if not chunk:
break
buffer += chunk
lines = buffer.split(b"\n")
buffer = lines[-1]
lines = lines[:-1]
for line in lines:
line = line.rstrip(b"\r")
if not command_sent:
command_sent = True
continue
if line.strip() == b"echo $?" or line.strip().isdigit():
continue
if line.strip():
result_lines.append(line)
if buffer.endswith(b"$ "):
break
except socket.error as e:
if e.errno == socket.EWOULDBLOCK:
await asyncio.sleep(0.1)
continue
raise
output = b"\n".join(result_lines).decode("utf-8")
output = re.sub(r"\n\$ echo \$\$?.*$", "", output)
return output
if timeout:
result = await asyncio.wait_for(read_output(), timeout)
else:
result = await read_output()
return result.strip()
except asyncio.TimeoutError:
raise TimeoutError(f"Command execution timed out after {timeout} seconds")
except Exception as e:
raise RuntimeError(f"Failed to execute command: {e}")
def _sanitize_command(self, command: str) -> str:
"""Sanitizes the command string to prevent shell injection.
Args:
command: Raw command string.
Returns:
Sanitized command string.
Raises:
ValueError: If command contains potentially dangerous patterns.
"""
# Additional checks for specific risky commands
risky_commands = [
"rm -rf /",
"rm -rf /*",
"mkfs",
"dd if=/dev/zero",
":(){:|:&};:",
"chmod -R 777 /",
"chown -R",
]
for risky in risky_commands:
if risky in command.lower():
raise ValueError(
f"Command contains potentially dangerous operation: {risky}"
)
return command
class AsyncDockerizedTerminal:
def __init__(
self,
container: Union[str, Container],
working_dir: str = "/workspace",
env_vars: Optional[Dict[str, str]] = None,
default_timeout: int = 60,
) -> None:
"""Initializes an asynchronous terminal for Docker containers.
Args:
container: Docker container ID or Container object.
working_dir: Working directory inside the container.
env_vars: Environment variables to set.
default_timeout: Default command execution timeout in seconds.
"""
self.client = docker.from_env()
self.container = (
container
if isinstance(container, Container)
else self.client.containers.get(container)
)
self.working_dir = working_dir
self.env_vars = env_vars or {}
self.default_timeout = default_timeout
self.session = None
async def init(self) -> None:
"""Initializes the terminal environment.
Ensures working directory exists and creates an interactive session.
Raises:
RuntimeError: If initialization fails.
"""
await self._ensure_workdir()
self.session = DockerSession(self.container.id)
await self.session.create(self.working_dir, self.env_vars)
async def _ensure_workdir(self) -> None:
"""Ensures working directory exists in container.
Raises:
RuntimeError: If directory creation fails.
"""
try:
await self._exec_simple(f"mkdir -p {self.working_dir}")
except APIError as e:
raise RuntimeError(f"Failed to create working directory: {e}")
async def _exec_simple(self, cmd: str) -> Tuple[int, str]:
"""Executes a simple command using Docker's exec_run.
Args:
cmd: Command to execute.
Returns:
Tuple of (exit_code, output).
"""
result = await asyncio.to_thread(
self.container.exec_run, cmd, environment=self.env_vars
)
return result.exit_code, result.output.decode("utf-8")
async def run_command(self, cmd: str, timeout: Optional[int] = None) -> str:
"""Runs a command in the container with timeout.
Args:
cmd: Shell command to execute.
timeout: Maximum execution time in seconds.
Returns:
Command output as string.
Raises:
RuntimeError: If terminal not initialized.
"""
if not self.session:
raise RuntimeError("Terminal not initialized")
return await self.session.execute(cmd, timeout=timeout or self.default_timeout)
async def close(self) -> None:
"""Closes the terminal session."""
if self.session:
await self.session.close()
async def __aenter__(self) -> "AsyncDockerizedTerminal":
"""Async context manager entry."""
await self.init()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
"""Async context manager exit."""
await self.close()

View File

@ -1,5 +1,6 @@
from app.tool.base import BaseTool
from app.tool.bash import Bash
from app.tool.browser_use_tool import BrowserUseTool
from app.tool.create_chat_completion import CreateChatCompletion
from app.tool.planning import PlanningTool
from app.tool.str_replace_editor import StrReplaceEditor
@ -10,6 +11,7 @@ from app.tool.tool_collection import ToolCollection
__all__ = [
"BaseTool",
"Bash",
"BrowserUseTool",
"Terminate",
"StrReplaceEditor",
"ToolCollection",

View File

@ -418,17 +418,7 @@ class BrowserUseTool(BaseTool, Generic[Context]):
# Create prompt for LLM
prompt_text = """
Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page.
Examples of extraction goals:
- Extract all company names
- Extract specific descriptions
- Extract all information about a topic
- Extract links with companies in structured format
- Extract all links
If the goal is vague, summarize the page. Respond in JSON format.
Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format.
Extraction goal: {goal}
Page content:
@ -445,10 +435,54 @@ Page content:
messages = [Message.user_message(formatted_prompt)]
# Use LLM to extract content based on the goal
response = await self.llm.ask(messages)
# Define extraction function for the tool
extraction_function = {
"type": "function",
"function": {
"name": "extract_content",
"description": "Extract specific information from a webpage based on a goal",
"parameters": {
"type": "object",
"properties": {
"extracted_content": {
"type": "object",
"description": "The content extracted from the page according to the goal",
}
},
"required": ["extracted_content"],
},
},
}
# Use LLM to extract content with required function calling
response = await self.llm.ask_tool(
messages,
tools=[extraction_function],
tool_choice="required",
)
# Extract content from function call response
if (
response
and response.tool_calls
and len(response.tool_calls) > 0
):
# Get the first tool call arguments
tool_call = response.tool_calls[0]
# Parse the JSON arguments
try:
args = json.loads(tool_call.function.arguments)
extracted_content = args.get("extracted_content", {})
# Format extracted content as JSON string
content_json = json.dumps(
extracted_content, indent=2, ensure_ascii=False
)
msg = f"Extracted from page:\n{content_json}\n"
except Exception as e:
msg = f"Error parsing extraction result: {str(e)}\nRaw response: {tool_call.function.arguments}"
else:
msg = "No content was extracted from the page."
msg = f"Extracted from page:\n{response}\n"
return ToolResult(output=msg)
except Exception as e:
# Provide a more helpful error message

156
app/tool/file_operators.py Normal file
View File

@ -0,0 +1,156 @@
"""File operation interfaces and implementations for local and sandbox environments."""
import asyncio
from pathlib import Path
from typing import Optional, Protocol, Tuple, Union, runtime_checkable
from app.config import SandboxSettings
from app.exceptions import ToolError
from app.sandbox.client import SANDBOX_CLIENT
PathLike = Union[str, Path]
@runtime_checkable
class FileOperator(Protocol):
"""Interface for file operations in different environments."""
async def read_file(self, path: PathLike) -> str:
"""Read content from a file."""
...
async def write_file(self, path: PathLike, content: str) -> None:
"""Write content to a file."""
...
async def is_directory(self, path: PathLike) -> bool:
"""Check if path points to a directory."""
...
async def exists(self, path: PathLike) -> bool:
"""Check if path exists."""
...
async def run_command(
self, cmd: str, timeout: Optional[float] = 120.0
) -> Tuple[int, str, str]:
"""Run a shell command and return (return_code, stdout, stderr)."""
...
class LocalFileOperator(FileOperator):
"""File operations implementation for local filesystem."""
async def read_file(self, path: PathLike) -> str:
"""Read content from a local file."""
try:
return Path(path).read_text()
except Exception as e:
raise ToolError(f"Failed to read {path}: {str(e)}") from None
async def write_file(self, path: PathLike, content: str) -> None:
"""Write content to a local file."""
try:
Path(path).write_text(content)
except Exception as e:
raise ToolError(f"Failed to write to {path}: {str(e)}") from None
async def is_directory(self, path: PathLike) -> bool:
"""Check if path points to a directory."""
return Path(path).is_dir()
async def exists(self, path: PathLike) -> bool:
"""Check if path exists."""
return Path(path).exists()
async def run_command(
self, cmd: str, timeout: Optional[float] = 120.0
) -> Tuple[int, str, str]:
"""Run a shell command locally."""
process = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
try:
stdout, stderr = await asyncio.wait_for(
process.communicate(), timeout=timeout
)
return (
process.returncode or 0,
stdout.decode(),
stderr.decode(),
)
except asyncio.TimeoutError as exc:
try:
process.kill()
except ProcessLookupError:
pass
raise TimeoutError(
f"Command '{cmd}' timed out after {timeout} seconds"
) from exc
class SandboxFileOperator(FileOperator):
"""File operations implementation for sandbox environment."""
def __init__(self):
self.sandbox_client = SANDBOX_CLIENT
async def _ensure_sandbox_initialized(self):
"""Ensure sandbox is initialized."""
if not self.sandbox_client.sandbox:
await self.sandbox_client.create(config=SandboxSettings())
async def read_file(self, path: PathLike) -> str:
"""Read content from a file in sandbox."""
await self._ensure_sandbox_initialized()
try:
return await self.sandbox_client.read_file(str(path))
except Exception as e:
raise ToolError(f"Failed to read {path} in sandbox: {str(e)}") from None
async def write_file(self, path: PathLike, content: str) -> None:
"""Write content to a file in sandbox."""
await self._ensure_sandbox_initialized()
try:
await self.sandbox_client.write_file(str(path), content)
except Exception as e:
raise ToolError(f"Failed to write to {path} in sandbox: {str(e)}") from None
async def is_directory(self, path: PathLike) -> bool:
"""Check if path points to a directory in sandbox."""
await self._ensure_sandbox_initialized()
result = await self.sandbox_client.run_command(
f"test -d {path} && echo 'true' || echo 'false'"
)
return result.strip() == "true"
async def exists(self, path: PathLike) -> bool:
"""Check if path exists in sandbox."""
await self._ensure_sandbox_initialized()
result = await self.sandbox_client.run_command(
f"test -e {path} && echo 'true' || echo 'false'"
)
return result.strip() == "true"
async def run_command(
self, cmd: str, timeout: Optional[float] = 120.0
) -> Tuple[int, str, str]:
"""Run a command in sandbox environment."""
await self._ensure_sandbox_initialized()
try:
stdout = await self.sandbox_client.run_command(
cmd, timeout=int(timeout) if timeout else None
)
return (
0, # Always return 0 since we don't have explicit return code from sandbox
stdout,
"", # No stderr capture in the current sandbox implementation
)
except TimeoutError as exc:
raise TimeoutError(
f"Command '{cmd}' timed out after {timeout} seconds in sandbox"
) from exc
except Exception as exc:
return 1, "", f"Error executing command in sandbox: {str(exc)}"

View File

@ -1,43 +0,0 @@
"""Utility to run shell commands asynchronously with a timeout."""
import asyncio
TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
MAX_RESPONSE_LEN: int = 16000
def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
"""Truncate content and append a notice if content exceeds the specified length."""
return (
content
if not truncate_after or len(content) <= truncate_after
else content[:truncate_after] + TRUNCATED_MESSAGE
)
async def run(
cmd: str,
timeout: float | None = 120.0, # seconds
truncate_after: int | None = MAX_RESPONSE_LEN,
):
"""Run a shell command asynchronously with a timeout."""
process = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
try:
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
return (
process.returncode or 0,
maybe_truncate(stdout.decode(), truncate_after=truncate_after),
maybe_truncate(stderr.decode(), truncate_after=truncate_after),
)
except asyncio.TimeoutError as exc:
try:
process.kill()
except ProcessLookupError:
pass
raise TimeoutError(
f"Command '{cmd}' timed out after {timeout} seconds"
) from exc

View File

@ -1,5 +1,6 @@
from app.tool.search.baidu_search import BaiduSearchEngine
from app.tool.search.base import WebSearchEngine
from app.tool.search.bing_search import BingSearchEngine
from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
from app.tool.search.google_search import GoogleSearchEngine
@ -9,4 +10,5 @@ __all__ = [
"BaiduSearchEngine",
"DuckDuckGoSearchEngine",
"GoogleSearchEngine",
"BingSearchEngine",
]

View File

@ -0,0 +1,146 @@
from typing import List
import requests
from bs4 import BeautifulSoup
from app.logger import logger
from app.tool.search.base import WebSearchEngine
ABSTRACT_MAX_LENGTH = 300
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3",
"Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866",
]
HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": USER_AGENTS[0],
"Referer": "https://www.bing.com/",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
}
BING_HOST_URL = "https://www.bing.com"
BING_SEARCH_URL = "https://www.bing.com/search?q="
class BingSearchEngine(WebSearchEngine):
session: requests.Session = None
def __init__(self, **data):
"""Initialize the BingSearch tool with a requests session."""
super().__init__(**data)
self.session = requests.Session()
self.session.headers.update(HEADERS)
def _search_sync(self, query: str, num_results: int = 10) -> List[str]:
"""
Synchronous Bing search implementation to retrieve a list of URLs matching a query.
Args:
query (str): The search query to submit to Bing. Must not be empty.
num_results (int, optional): The maximum number of URLs to return. Defaults to 10.
Returns:
List[str]: A list of URLs from the search results, capped at `num_results`.
Returns an empty list if the query is empty or no results are found.
Notes:
- Pagination is handled by incrementing the `first` parameter and following `next_url` links.
- If fewer results than `num_results` are available, all found URLs are returned.
"""
if not query:
return []
list_result = []
first = 1
next_url = BING_SEARCH_URL + query
while len(list_result) < num_results:
data, next_url = self._parse_html(
next_url, rank_start=len(list_result), first=first
)
if data:
list_result.extend([item["url"] for item in data])
if not next_url:
break
first += 10
return list_result[:num_results]
def _parse_html(self, url: str, rank_start: int = 0, first: int = 1) -> tuple:
"""
Parse Bing search result HTML synchronously to extract search results and the next page URL.
Args:
url (str): The URL of the Bing search results page to parse.
rank_start (int, optional): The starting rank for numbering the search results. Defaults to 0.
first (int, optional): Unused parameter (possibly legacy). Defaults to 1.
Returns:
tuple: A tuple containing:
- list: A list of dictionaries with keys 'title', 'abstract', 'url', and 'rank' for each result.
- str or None: The URL of the next results page, or None if there is no next page.
"""
try:
res = self.session.get(url=url)
res.encoding = "utf-8"
root = BeautifulSoup(res.text, "lxml")
list_data = []
ol_results = root.find("ol", id="b_results")
if not ol_results:
return [], None
for li in ol_results.find_all("li", class_="b_algo"):
title = ""
url = ""
abstract = ""
try:
h2 = li.find("h2")
if h2:
title = h2.text.strip()
url = h2.a["href"].strip()
p = li.find("p")
if p:
abstract = p.text.strip()
if ABSTRACT_MAX_LENGTH and len(abstract) > ABSTRACT_MAX_LENGTH:
abstract = abstract[:ABSTRACT_MAX_LENGTH]
rank_start += 1
list_data.append(
{
"title": title,
"abstract": abstract,
"url": url,
"rank": rank_start,
}
)
except Exception:
continue
next_btn = root.find("a", title="Next page")
if not next_btn:
return list_data, None
next_url = BING_HOST_URL + next_btn["href"]
return list_data, next_url
except Exception as e:
logger.warning(f"Error parsing HTML: {e}")
return [], None
def perform_search(self, query, num_results=10, *args, **kwargs):
"""Bing search engine."""
return self._search_sync(query, num_results=num_results)

View File

@ -1,11 +1,19 @@
"""File and directory manipulation tool with sandbox support."""
from collections import defaultdict
from pathlib import Path
from typing import Literal, get_args
from typing import Any, DefaultDict, List, Literal, Optional, get_args
from app.config import config
from app.exceptions import ToolError
from app.tool import BaseTool
from app.tool.base import CLIResult, ToolResult
from app.tool.run import run
from app.tool.file_operators import (
FileOperator,
LocalFileOperator,
PathLike,
SandboxFileOperator,
)
Command = Literal[
@ -15,12 +23,17 @@ Command = Literal[
"insert",
"undo_edit",
]
# Constants
SNIPPET_LINES: int = 4
MAX_RESPONSE_LEN: int = 16000
TRUNCATED_MESSAGE: str = (
"<response clipped><NOTE>To save on context only part of this file has been shown to you. "
"You should retry this tool after you have searched inside the file with `grep -n` "
"in order to find the line numbers of what you are looking for.</NOTE>"
)
TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
# Tool description
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files
* State is persistent across command calls and discussions with the user
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
@ -35,17 +48,17 @@ Notes for using the `str_replace` command:
"""
def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
def maybe_truncate(
content: str, truncate_after: Optional[int] = MAX_RESPONSE_LEN
) -> str:
"""Truncate content and append a notice if content exceeds the specified length."""
return (
content
if not truncate_after or len(content) <= truncate_after
else content[:truncate_after] + TRUNCATED_MESSAGE
)
if not truncate_after or len(content) <= truncate_after:
return content
return content[:truncate_after] + TRUNCATED_MESSAGE
class StrReplaceEditor(BaseTool):
"""A tool for executing bash commands"""
"""A tool for viewing, creating, and editing files with sandbox support."""
name: str = "str_replace_editor"
description: str = _STR_REPLACE_EDITOR_DESCRIPTION
@ -85,8 +98,18 @@ class StrReplaceEditor(BaseTool):
},
"required": ["command", "path"],
}
_file_history: DefaultDict[PathLike, List[str]] = defaultdict(list)
_local_operator: LocalFileOperator = LocalFileOperator()
_sandbox_operator: SandboxFileOperator = SandboxFileOperator()
_file_history: list = defaultdict(list)
# def _get_operator(self, use_sandbox: bool) -> FileOperator:
def _get_operator(self) -> FileOperator:
"""Get the appropriate file operator based on execution mode."""
return (
self._sandbox_operator
if config.sandbox.use_sandbox
else self._local_operator
)
async def execute(
self,
@ -98,24 +121,30 @@ class StrReplaceEditor(BaseTool):
old_str: str | None = None,
new_str: str | None = None,
insert_line: int | None = None,
**kwargs,
**kwargs: Any,
) -> str:
_path = Path(path)
self.validate_path(command, _path)
"""Execute a file operation command."""
# Get the appropriate file operator
operator = self._get_operator()
# Validate path and command combination
await self.validate_path(command, Path(path), operator)
# Execute the appropriate command
if command == "view":
result = await self.view(_path, view_range)
result = await self.view(path, view_range, operator)
elif command == "create":
if file_text is None:
raise ToolError("Parameter `file_text` is required for command: create")
self.write_file(_path, file_text)
self._file_history[_path].append(file_text)
result = ToolResult(output=f"File created successfully at: {_path}")
await operator.write_file(path, file_text)
self._file_history[path].append(file_text)
result = ToolResult(output=f"File created successfully at: {path}")
elif command == "str_replace":
if old_str is None:
raise ToolError(
"Parameter `old_str` is required for command: str_replace"
)
result = self.str_replace(_path, old_str, new_str)
result = await self.str_replace(path, old_str, new_str, operator)
elif command == "insert":
if insert_line is None:
raise ToolError(
@ -123,89 +152,145 @@ class StrReplaceEditor(BaseTool):
)
if new_str is None:
raise ToolError("Parameter `new_str` is required for command: insert")
result = self.insert(_path, insert_line, new_str)
result = await self.insert(path, insert_line, new_str, operator)
elif command == "undo_edit":
result = self.undo_edit(_path)
result = await self.undo_edit(path, operator)
else:
# This should be caught by type checking, but we include it for safety
raise ToolError(
f'Unrecognized command {command}. The allowed commands for the {self.name} tool are: {", ".join(get_args(Command))}'
)
return str(result)
def validate_path(self, command: str, path: Path):
"""
Check that the path/command combination is valid.
"""
# Check if its an absolute path
async def validate_path(
self, command: str, path: Path, operator: FileOperator
) -> None:
"""Validate path and command combination based on execution environment."""
# Check if path is absolute
if not path.is_absolute():
raise ToolError(f"The path {path} is not an absolute path")
# Check if path exists
if not path.exists() and command != "create":
# Only check if path exists for non-create commands
if command != "create":
if not await operator.exists(path):
raise ToolError(
f"The path {path} does not exist. Please provide a valid path."
)
if path.exists() and command == "create":
raise ToolError(
f"File already exists at: {path}. Cannot overwrite files using command `create`."
)
# Check if the path points to a directory
if path.is_dir():
if command != "view":
# Check if path is a directory
is_dir = await operator.is_directory(path)
if is_dir and command != "view":
raise ToolError(
f"The path {path} is a directory and only the `view` command can be used on directories"
)
async def view(self, path: Path, view_range: list[int] | None = None):
"""Implement the view command"""
if path.is_dir():
# Check if file exists for create command
elif command == "create":
exists = await operator.exists(path)
if exists:
raise ToolError(
f"File already exists at: {path}. Cannot overwrite files using command `create`."
)
async def view(
self,
path: PathLike,
view_range: Optional[List[int]] = None,
operator: FileOperator = None,
) -> CLIResult:
"""Display file or directory content."""
# Determine if path is a directory
is_dir = await operator.is_directory(path)
if is_dir:
# Directory handling
if view_range:
raise ToolError(
"The `view_range` parameter is not allowed when `path` points to a directory."
)
_, stdout, stderr = await run(
rf"find {path} -maxdepth 2 -not -path '*/\.*'"
)
return await self._view_directory(path, operator)
else:
# File handling
return await self._view_file(path, operator, view_range)
@staticmethod
async def _view_directory(path: PathLike, operator: FileOperator) -> CLIResult:
"""Display directory contents."""
find_cmd = f"find {path} -maxdepth 2 -not -path '*/\\.*'"
# Execute command using the operator
returncode, stdout, stderr = await operator.run_command(find_cmd)
if not stderr:
stdout = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{stdout}\n"
stdout = (
f"Here's the files and directories up to 2 levels deep in {path}, "
f"excluding hidden items:\n{stdout}\n"
)
return CLIResult(output=stdout, error=stderr)
file_content = self.read_file(path)
async def _view_file(
self,
path: PathLike,
operator: FileOperator,
view_range: Optional[List[int]] = None,
) -> CLIResult:
"""Display file content, optionally within a specified line range."""
# Read file content
file_content = await operator.read_file(path)
init_line = 1
# Apply view range if specified
if view_range:
if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range):
raise ToolError(
"Invalid `view_range`. It should be a list of two integers."
)
file_lines = file_content.split("\n")
n_lines_file = len(file_lines)
init_line, final_line = view_range
# Validate view range
if init_line < 1 or init_line > n_lines_file:
raise ToolError(
f"Invalid `view_range`: {view_range}. Its first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}"
f"Invalid `view_range`: {view_range}. Its first element `{init_line}` should be "
f"within the range of lines of the file: {[1, n_lines_file]}"
)
if final_line > n_lines_file:
raise ToolError(
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`"
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be "
f"smaller than the number of lines in the file: `{n_lines_file}`"
)
if final_line != -1 and final_line < init_line:
raise ToolError(
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be larger or equal than its first `{init_line}`"
f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be "
f"larger or equal than its first `{init_line}`"
)
# Apply range
if final_line == -1:
file_content = "\n".join(file_lines[init_line - 1 :])
else:
file_content = "\n".join(file_lines[init_line - 1 : final_line])
# Format and return result
return CLIResult(
output=self._make_output(file_content, str(path), init_line=init_line)
)
def str_replace(self, path: Path, old_str: str, new_str: str | None):
"""Implement the str_replace command, which replaces old_str with new_str in the file content"""
# Read the file content
file_content = self.read_file(path).expandtabs()
async def str_replace(
self,
path: PathLike,
old_str: str,
new_str: Optional[str] = None,
operator: FileOperator = None,
) -> CLIResult:
"""Replace a unique string in a file with a new string."""
# Read file content and expand tabs
file_content = (await operator.read_file(path)).expandtabs()
old_str = old_str.expandtabs()
new_str = new_str.expandtabs() if new_str is not None else ""
@ -216,6 +301,7 @@ class StrReplaceEditor(BaseTool):
f"No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}."
)
elif occurrences > 1:
# Find line numbers of occurrences
file_content_lines = file_content.split("\n")
lines = [
idx + 1
@ -223,16 +309,17 @@ class StrReplaceEditor(BaseTool):
if old_str in line
]
raise ToolError(
f"No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique"
f"No replacement was performed. Multiple occurrences of old_str `{old_str}` "
f"in lines {lines}. Please ensure it is unique"
)
# Replace old_str with new_str
new_file_content = file_content.replace(old_str, new_str)
# Write the new content to the file
self.write_file(path, new_file_content)
await operator.write_file(path, new_file_content)
# Save the content to history
# Save the original content to history
self._file_history[path].append(file_content)
# Create a snippet of the edited section
@ -250,36 +337,50 @@ class StrReplaceEditor(BaseTool):
return CLIResult(output=success_msg)
def insert(self, path: Path, insert_line: int, new_str: str):
"""Implement the insert command, which inserts new_str at the specified line in the file content."""
file_text = self.read_file(path).expandtabs()
async def insert(
self,
path: PathLike,
insert_line: int,
new_str: str,
operator: FileOperator = None,
) -> CLIResult:
"""Insert text at a specific line in a file."""
# Read and prepare content
file_text = (await operator.read_file(path)).expandtabs()
new_str = new_str.expandtabs()
file_text_lines = file_text.split("\n")
n_lines_file = len(file_text_lines)
# Validate insert_line
if insert_line < 0 or insert_line > n_lines_file:
raise ToolError(
f"Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the file: {[0, n_lines_file]}"
f"Invalid `insert_line` parameter: {insert_line}. It should be within "
f"the range of lines of the file: {[0, n_lines_file]}"
)
# Perform insertion
new_str_lines = new_str.split("\n")
new_file_text_lines = (
file_text_lines[:insert_line]
+ new_str_lines
+ file_text_lines[insert_line:]
)
# Create a snippet for preview
snippet_lines = (
file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
+ new_str_lines
+ file_text_lines[insert_line : insert_line + SNIPPET_LINES]
)
# Join lines and write to file
new_file_text = "\n".join(new_file_text_lines)
snippet = "\n".join(snippet_lines)
self.write_file(path, new_file_text)
await operator.write_file(path, new_file_text)
self._file_history[path].append(file_text)
# Prepare success message
success_msg = f"The file {path} has been edited. "
success_msg += self._make_output(
snippet,
@ -287,51 +388,43 @@ class StrReplaceEditor(BaseTool):
max(1, insert_line - SNIPPET_LINES + 1),
)
success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary."
return CLIResult(output=success_msg)
def undo_edit(self, path: Path):
"""Implement the undo_edit command."""
async def undo_edit(
self, path: PathLike, operator: FileOperator = None
) -> CLIResult:
"""Revert the last edit made to a file."""
if not self._file_history[path]:
raise ToolError(f"No edit history found for {path}.")
old_text = self._file_history[path].pop()
self.write_file(path, old_text)
await operator.write_file(path, old_text)
return CLIResult(
output=f"Last edit to {path} undone successfully. {self._make_output(old_text, str(path))}"
)
def read_file(self, path: Path):
"""Read the content of a file from a given path; raise a ToolError if an error occurs."""
try:
return path.read_text()
except Exception as e:
raise ToolError(f"Ran into {e} while trying to read {path}") from None
def write_file(self, path: Path, file: str):
"""Write the content of a file to a given path; raise a ToolError if an error occurs."""
try:
path.write_text(file)
except Exception as e:
raise ToolError(f"Ran into {e} while trying to write to {path}") from None
def _make_output(
self,
file_content: str,
file_descriptor: str,
init_line: int = 1,
expand_tabs: bool = True,
):
"""Generate output for the CLI based on the content of a file."""
) -> str:
"""Format file content for display with line numbers."""
file_content = maybe_truncate(file_content)
if expand_tabs:
file_content = file_content.expandtabs()
# Add line numbers to each line
file_content = "\n".join(
[
f"{i + init_line:6}\t{line}"
for i, line in enumerate(file_content.split("\n"))
]
)
return (
f"Here's the result of running `cat -n` on {file_descriptor}:\n"
+ file_content

View File

@ -7,6 +7,7 @@ from app.config import config
from app.tool.base import BaseTool
from app.tool.search import (
BaiduSearchEngine,
BingSearchEngine,
DuckDuckGoSearchEngine,
GoogleSearchEngine,
WebSearchEngine,
@ -37,6 +38,7 @@ class WebSearch(BaseTool):
"google": GoogleSearchEngine(),
"baidu": BaiduSearchEngine(),
"duckduckgo": DuckDuckGoSearchEngine(),
"bing": BingSearchEngine(),
}
async def execute(self, query: str, num_results: int = 10) -> List[str]:

View File

@ -65,3 +65,13 @@ temperature = 0.0 # Controls randomness for vision mod
# [search]
# Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo".
#engine = "Google"
## Sandbox configuration
#[sandbox]
#use_sandbox = false
#image = "python:3.12-slim"
#work_dir = "/workspace"
#memory_limit = "1g" # 512m
#cpu_limit = 2.0
#timeout = 300
#network_enabled = true

View File

@ -23,3 +23,7 @@ aiofiles~=24.1.0
pydantic_core~=2.27.2
colorama~=0.4.6
playwright~=1.50.0
docker~=7.1.0
pytest~=8.3.5
pytest-asyncio~=0.25.3

View File

@ -0,0 +1,110 @@
import tempfile
from pathlib import Path
from typing import AsyncGenerator
import pytest
import pytest_asyncio
from app.config import SandboxSettings
from app.sandbox.client import LocalSandboxClient, create_sandbox_client
@pytest_asyncio.fixture(scope="function")
async def local_client() -> AsyncGenerator[LocalSandboxClient, None]:
"""Creates a local sandbox client for testing."""
client = create_sandbox_client()
try:
yield client
finally:
await client.cleanup()
@pytest.fixture(scope="function")
def temp_dir() -> Path:
"""Creates a temporary directory for testing."""
with tempfile.TemporaryDirectory() as tmp_dir:
yield Path(tmp_dir)
@pytest.mark.asyncio
async def test_sandbox_creation(local_client: LocalSandboxClient):
"""Tests sandbox creation with specific configuration."""
config = SandboxSettings(
image="python:3.12-slim",
work_dir="/workspace",
memory_limit="512m",
cpu_limit=0.5,
)
await local_client.create(config)
result = await local_client.run_command("python3 --version")
assert "Python 3.10" in result
@pytest.mark.asyncio
async def test_local_command_execution(local_client: LocalSandboxClient):
"""Tests command execution in local sandbox."""
await local_client.create()
result = await local_client.run_command("echo 'test'")
assert result.strip() == "test"
with pytest.raises(Exception):
await local_client.run_command("sleep 10", timeout=1)
@pytest.mark.asyncio
async def test_local_file_operations(local_client: LocalSandboxClient, temp_dir: Path):
"""Tests file operations in local sandbox."""
await local_client.create()
# Test write and read operations
test_content = "Hello, World!"
await local_client.write_file("/workspace/test.txt", test_content)
content = await local_client.read_file("/workspace/test.txt")
assert content.strip() == test_content
# Test copying file to container
src_file = temp_dir / "src.txt"
src_file.write_text("Copy to container")
await local_client.copy_to(str(src_file), "/workspace/copied.txt")
content = await local_client.read_file("/workspace/copied.txt")
assert content.strip() == "Copy to container"
# Test copying file from container
dst_file = temp_dir / "dst.txt"
await local_client.copy_from("/workspace/test.txt", str(dst_file))
assert dst_file.read_text().strip() == test_content
@pytest.mark.asyncio
async def test_local_volume_binding(local_client: LocalSandboxClient, temp_dir: Path):
"""Tests volume binding in local sandbox."""
bind_path = str(temp_dir)
volume_bindings = {bind_path: "/data"}
await local_client.create(volume_bindings=volume_bindings)
test_file = temp_dir / "test.txt"
test_file.write_text("Volume test")
content = await local_client.read_file("/data/test.txt")
assert "Volume test" in content
@pytest.mark.asyncio
async def test_local_error_handling(local_client: LocalSandboxClient):
"""Tests error handling in local sandbox."""
await local_client.create()
with pytest.raises(Exception) as exc:
await local_client.read_file("/nonexistent.txt")
assert "not found" in str(exc.value).lower()
with pytest.raises(Exception) as exc:
await local_client.copy_from("/nonexistent.txt", "local.txt")
assert "not found" in str(exc.value).lower()
if __name__ == "__main__":
pytest.main(["-v", __file__])

View File

@ -0,0 +1,104 @@
"""Tests for the AsyncDockerizedTerminal implementation."""
import docker
import pytest
import pytest_asyncio
from app.sandbox.core.terminal import AsyncDockerizedTerminal
@pytest.fixture(scope="module")
def docker_client():
"""Fixture providing a Docker client."""
return docker.from_env()
@pytest_asyncio.fixture(scope="module")
async def docker_container(docker_client):
"""Fixture providing a test Docker container."""
container = docker_client.containers.run(
"python:3.12-slim",
"tail -f /dev/null",
name="test_container",
detach=True,
remove=True,
)
yield container
container.stop()
@pytest_asyncio.fixture
async def terminal(docker_container):
"""Fixture providing an initialized AsyncDockerizedTerminal instance."""
terminal = AsyncDockerizedTerminal(
docker_container,
working_dir="/workspace",
env_vars={"TEST_VAR": "test_value"},
default_timeout=30,
)
await terminal.init()
yield terminal
await terminal.close()
class TestAsyncDockerizedTerminal:
"""Test cases for AsyncDockerizedTerminal."""
@pytest.mark.asyncio
async def test_basic_command_execution(self, terminal):
"""Test basic command execution functionality."""
result = await terminal.run_command("echo 'Hello World'")
assert "Hello World" in result
@pytest.mark.asyncio
async def test_environment_variables(self, terminal):
"""Test environment variable setting and access."""
result = await terminal.run_command("echo $TEST_VAR")
assert "test_value" in result
@pytest.mark.asyncio
async def test_working_directory(self, terminal):
"""Test working directory setup."""
result = await terminal.run_command("pwd")
assert "/workspace" == result
@pytest.mark.asyncio
async def test_command_timeout(self, docker_container):
"""Test command timeout functionality."""
terminal = AsyncDockerizedTerminal(docker_container, default_timeout=1)
await terminal.init()
try:
with pytest.raises(TimeoutError):
await terminal.run_command("sleep 5")
finally:
await terminal.close()
@pytest.mark.asyncio
async def test_multiple_commands(self, terminal):
"""Test execution of multiple commands in sequence."""
cmd1 = await terminal.run_command("echo 'First'")
cmd2 = await terminal.run_command("echo 'Second'")
assert "First" in cmd1
assert "Second" in cmd2
@pytest.mark.asyncio
async def test_session_cleanup(self, docker_container):
"""Test proper cleanup of resources."""
terminal = AsyncDockerizedTerminal(docker_container)
await terminal.init()
assert terminal.session is not None
await terminal.close()
# Verify session is properly cleaned up
# Note: session object still exists, but internal connection is closed
assert terminal.session is not None
# Configure pytest-asyncio
def pytest_configure(config):
"""Configure pytest-asyncio."""
config.addinivalue_line("asyncio_mode", "strict")
config.addinivalue_line("asyncio_default_fixture_loop_scope", "function")
if __name__ == "__main__":
pytest.main(["-v", __file__])

View File

@ -0,0 +1,152 @@
import pytest
import pytest_asyncio
from app.sandbox.core.sandbox import DockerSandbox, SandboxSettings
@pytest.fixture(scope="module")
def sandbox_config():
"""Creates sandbox configuration for testing."""
return SandboxSettings(
image="python:3.12-slim",
work_dir="/workspace",
memory_limit="1g",
cpu_limit=0.5,
network_enabled=True,
)
@pytest_asyncio.fixture(scope="module")
async def sandbox(sandbox_config):
"""Creates and manages a test sandbox instance."""
sandbox = DockerSandbox(sandbox_config)
await sandbox.create()
try:
yield sandbox
finally:
await sandbox.cleanup()
@pytest.mark.asyncio
async def test_sandbox_working_directory(sandbox):
"""Tests sandbox working directory configuration."""
result = await sandbox.terminal.run_command("pwd")
assert result.strip() == "/workspace"
@pytest.mark.asyncio
async def test_sandbox_file_operations(sandbox):
"""Tests sandbox file read/write operations."""
# Test file writing
test_content = "Hello from sandbox!"
await sandbox.write_file("/workspace/test.txt", test_content)
# Test file reading
content = await sandbox.read_file("/workspace/test.txt")
assert content.strip() == test_content
@pytest.mark.asyncio
async def test_sandbox_python_execution(sandbox):
"""Tests Python code execution in sandbox."""
# Write test file
await sandbox.write_file("/workspace/test.txt", "Hello from file!")
# Write Python script
python_code = """
print("Hello from Python!")
with open('/workspace/test.txt') as f:
print(f.read())
"""
await sandbox.write_file("/workspace/test.py", python_code)
# Execute script and verify output
result = await sandbox.terminal.run_command("python3 /workspace/test.py")
assert "Hello from Python!" in result
assert "Hello from file!" in result
@pytest.mark.asyncio
async def test_sandbox_file_persistence(sandbox):
"""Tests file persistence in sandbox."""
# Create multiple files
files = {
"file1.txt": "Content 1",
"file2.txt": "Content 2",
"nested/file3.txt": "Content 3",
}
# Write files
for path, content in files.items():
await sandbox.write_file(f"/workspace/{path}", content)
# Verify file contents
for path, expected_content in files.items():
content = await sandbox.read_file(f"/workspace/{path}")
assert content.strip() == expected_content
@pytest.mark.asyncio
async def test_sandbox_python_environment(sandbox):
"""Tests Python environment configuration."""
# Test Python version
result = await sandbox.terminal.run_command("python3 --version")
assert "Python 3.10" in result
# Test basic module imports
python_code = """
import sys
import os
import json
print("Python is working!")
"""
await sandbox.write_file("/workspace/env_test.py", python_code)
result = await sandbox.terminal.run_command("python3 /workspace/env_test.py")
assert "Python is working!" in result
@pytest.mark.asyncio
async def test_sandbox_network_access(sandbox):
"""Tests sandbox network access."""
if not sandbox.config.network_enabled:
pytest.skip("Network access is disabled")
# Test network connectivity
await sandbox.terminal.run_command("apt update && apt install curl -y")
result = await sandbox.terminal.run_command("curl -I https://www.example.com")
assert "HTTP/2 200" in result
@pytest.mark.asyncio
async def test_sandbox_cleanup(sandbox_config):
"""Tests sandbox cleanup process."""
sandbox = DockerSandbox(sandbox_config)
await sandbox.create()
# Create test files
await sandbox.write_file("/workspace/test.txt", "test")
container_id = sandbox.terminal.container.id
# Perform cleanup
await sandbox.cleanup()
# Verify container has been removed
import docker
client = docker.from_env()
containers = client.containers.list(all=True)
assert not any(c.id == container_id for c in containers)
@pytest.mark.asyncio
async def test_sandbox_error_handling():
"""Tests error handling with invalid configuration."""
# Test invalid configuration
invalid_config = SandboxSettings(image="nonexistent:latest", work_dir="/invalid")
sandbox = DockerSandbox(invalid_config)
with pytest.raises(Exception):
await sandbox.create()
if __name__ == "__main__":
pytest.main(["-v", __file__])

View File

@ -0,0 +1,138 @@
import asyncio
import os
import tempfile
from typing import AsyncGenerator
import pytest
import pytest_asyncio
from app.sandbox.core.manager import SandboxManager
@pytest_asyncio.fixture(scope="function")
async def manager() -> AsyncGenerator[SandboxManager, None]:
"""Creates a sandbox manager instance.
Uses function scope to ensure each test case has its own manager instance.
"""
manager = SandboxManager(max_sandboxes=2, idle_timeout=60, cleanup_interval=30)
try:
yield manager
finally:
# Ensure all resources are cleaned up
await manager.cleanup()
@pytest.fixture
def temp_file():
"""Creates a temporary test file."""
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
f.write("test content")
path = f.name
try:
yield path
finally:
if os.path.exists(path):
os.unlink(path)
@pytest.mark.asyncio
async def test_create_sandbox(manager):
"""Tests sandbox creation."""
# Create default sandbox
sandbox_id = await manager.create_sandbox()
assert sandbox_id in manager._sandboxes
assert sandbox_id in manager._last_used
# Verify sandbox functionality
sandbox = await manager.get_sandbox(sandbox_id)
result = await sandbox.run_command("echo 'test'")
assert result.strip() == "test"
@pytest.mark.asyncio
async def test_max_sandboxes_limit(manager):
"""Tests maximum sandbox limit enforcement."""
created_sandboxes = []
try:
# Create maximum number of sandboxes
for _ in range(manager.max_sandboxes):
sandbox_id = await manager.create_sandbox()
created_sandboxes.append(sandbox_id)
# Verify created sandbox count
assert len(manager._sandboxes) == manager.max_sandboxes
# Attempting to create additional sandbox should fail
with pytest.raises(RuntimeError) as exc_info:
await manager.create_sandbox()
# Verify error message
expected_message = (
f"Maximum number of sandboxes ({manager.max_sandboxes}) reached"
)
assert str(exc_info.value) == expected_message
finally:
# Clean up all created sandboxes
for sandbox_id in created_sandboxes:
try:
await manager.delete_sandbox(sandbox_id)
except Exception as e:
print(f"Failed to cleanup sandbox {sandbox_id}: {e}")
@pytest.mark.asyncio
async def test_get_nonexistent_sandbox(manager):
"""Tests retrieving a non-existent sandbox."""
with pytest.raises(KeyError, match="Sandbox .* not found"):
await manager.get_sandbox("nonexistent-id")
@pytest.mark.asyncio
async def test_sandbox_cleanup(manager):
"""Tests sandbox cleanup functionality."""
sandbox_id = await manager.create_sandbox()
assert sandbox_id in manager._sandboxes
await manager.delete_sandbox(sandbox_id)
assert sandbox_id not in manager._sandboxes
assert sandbox_id not in manager._last_used
@pytest.mark.asyncio
async def test_idle_sandbox_cleanup(manager):
"""Tests automatic cleanup of idle sandboxes."""
# Set short idle timeout
manager.idle_timeout = 0.1
sandbox_id = await manager.create_sandbox()
assert sandbox_id in manager._sandboxes
# Wait longer than idle timeout
await asyncio.sleep(0.2)
# Trigger cleanup
await manager._cleanup_idle_sandboxes()
assert sandbox_id not in manager._sandboxes
@pytest.mark.asyncio
async def test_manager_cleanup(manager):
"""Tests manager cleanup functionality."""
# Create multiple sandboxes
sandbox_ids = []
for _ in range(2):
sandbox_id = await manager.create_sandbox()
sandbox_ids.append(sandbox_id)
# Clean up all resources
await manager.cleanup()
# Verify all sandboxes have been cleaned up
assert not manager._sandboxes
assert not manager._last_used
if __name__ == "__main__":
pytest.main(["-v", __file__])