init project

2025-03-06 22:07:22 +08:00 · 2025-03-06 22:07:22 +08:00 · d028e64a98
commit d028e64a98
43 changed files with 3919 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,34 @@
+# HTML code is incorrectly calculated into statistics, so ignore them
+*.html linguist-detectable=false
+
+# Auto detect text files and perform LF normalization
+* text=auto eol=lf
+
+# Ensure shell scripts use LF (Linux style) line endings on Windows
+*.sh text eol=lf
+
+# Treat specific binary files as binary and prevent line ending conversion
+*.png binary
+*.jpg binary
+*.gif binary
+*.ico binary
+*.jpeg binary
+*.mp3 binary
+*.zip binary
+*.bin binary
+
+
+# Preserve original line endings for specific document files
+*.doc text eol=crlf
+*.docx text eol=crlf
+*.pdf binary
+
+# Ensure source code and script files use LF line endings
+*.py text eol=lf
+*.js text eol=lf
+*.html text eol=lf
+*.css text eol=lf
+
+# Specify custom diff driver for specific file types
+*.md diff=markdown
+*.json diff=json
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,180 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# PyPI configuration file
+.pypirc
+
+# Logs
+logs/
+
+# Data
+data/
+
+# Workspace
+workspace/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,39 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 23.1.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.0.1
+    hooks:
+      - id: autoflake
+        args: [
+          --remove-all-unused-imports,
+          --ignore-init-module-imports,  # 忽略 __init__.py 中的导入
+          --expand-star-imports,
+          --remove-duplicate-keys,
+          --remove-unused-variables,
+          --recursive,
+          --in-place,
+          --exclude=__init__.py,  # 排除 __init__.py 文件
+        ]
+        files: \.py$  # 只处理 Python 文件
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: [
+          "--profile", "black",  # 使用 black 兼容的配置
+          "--filter-files",
+          "--lines-after-imports=2",
+        ]
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 manna_and_poem
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
--- a/app/init.py
+++ b/app/init.py
--- a/app/agent/init.py
+++ b/app/agent/init.py
@ -0,0 +1,14 @@
+from app.agent.base import BaseAgent
+from app.agent.planning import PlanningAgent
+from app.agent.react import ReActAgent
+from app.agent.swe import SWEAgent
+from app.agent.toolcall import ToolCallAgent
+
+
+__all__ = [
+    "BaseAgent",
+    "PlanningAgent",
+    "ReActAgent",
+    "SWEAgent",
+    "ToolCallAgent",
+]
--- a/app/agent/base.py
+++ b/app/agent/base.py
@ -0,0 +1,191 @@
+from abc import ABC, abstractmethod
+from contextlib import asynccontextmanager
+from typing import List, Literal, Optional
+
+from pydantic import BaseModel, Field, model_validator
+
+from app.llm import LLM
+from app.logger import logger
+from app.schema import AgentState, Memory, Message
+
+
+class BaseAgent(BaseModel, ABC):
+    """Abstract base class for managing agent state and execution.
+
+    Provides foundational functionality for state transitions, memory management,
+    and a step-based execution loop. Subclasses must implement the `step` method.
+    """
+
+    # Core attributes
+    name: str = Field(..., description="Unique name of the agent")
+    description: Optional[str] = Field(None, description="Optional agent description")
+
+    # Prompts
+    system_prompt: Optional[str] = Field(
+        None, description="System-level instruction prompt"
+    )
+    next_step_prompt: Optional[str] = Field(
+        None, description="Prompt for determining next action"
+    )
+
+    # Dependencies
+    llm: LLM = Field(default_factory=LLM, description="Language model instance")
+    memory: Memory = Field(default_factory=Memory, description="Agent's memory store")
+    state: AgentState = Field(
+        default=AgentState.IDLE, description="Current agent state"
+    )
+
+    # Execution control
+    max_steps: int = Field(default=10, description="Maximum steps before termination")
+    current_step: int = Field(default=0, description="Current step in execution")
+
+    duplicate_threshold: int = 2
+
+    class Config:
+        arbitrary_types_allowed = True
+        extra = "allow"  # Allow extra fields for flexibility in subclasses
+
+    @model_validator(mode="after")
+    def initialize_agent(self) -> "BaseAgent":
+        """Initialize agent with default settings if not provided."""
+        if self.llm is None or not isinstance(self.llm, LLM):
+            self.llm = LLM(config_name=self.name.lower())
+        if not isinstance(self.memory, Memory):
+            self.memory = Memory()
+        return self
+
+    @asynccontextmanager
+    async def state_context(self, new_state: AgentState):
+        """Context manager for safe agent state transitions.
+
+        Args:
+            new_state: The state to transition to during the context.
+
+        Yields:
+            None: Allows execution within the new state.
+
+        Raises:
+            ValueError: If the new_state is invalid.
+        """
+        if not isinstance(new_state, AgentState):
+            raise ValueError(f"Invalid state: {new_state}")
+
+        previous_state = self.state
+        self.state = new_state
+        try:
+            yield
+        except Exception as e:
+            self.state = AgentState.ERROR  # Transition to ERROR on failure
+            raise e
+        finally:
+            self.state = previous_state  # Revert to previous state
+
+    def update_memory(
+        self,
+        role: Literal["user", "system", "assistant", "tool"],
+        content: str,
+        **kwargs,
+    ) -> None:
+        """Add a message to the agent's memory.
+
+        Args:
+            role: The role of the message sender (user, system, assistant, tool).
+            content: The message content.
+            **kwargs: Additional arguments (e.g., tool_call_id for tool messages).
+
+        Raises:
+            ValueError: If the role is unsupported.
+        """
+        message_map = {
+            "user": Message.user_message,
+            "system": Message.system_message,
+            "assistant": Message.assistant_message,
+            "tool": lambda content, **kw: Message.tool_message(content, **kw),
+        }
+
+        if role not in message_map:
+            raise ValueError(f"Unsupported message role: {role}")
+
+        msg_factory = message_map[role]
+        msg = msg_factory(content, **kwargs) if role == "tool" else msg_factory(content)
+        self.memory.add_message(msg)
+
+    async def run(self, request: Optional[str] = None) -> str:
+        """Execute the agent's main loop asynchronously.
+
+        Args:
+            request: Optional initial user request to process.
+
+        Returns:
+            A string summarizing the execution results.
+
+        Raises:
+            RuntimeError: If the agent is not in IDLE state at start.
+        """
+        if self.state != AgentState.IDLE:
+            raise RuntimeError(f"Cannot run agent from state: {self.state}")
+
+        if request:
+            self.update_memory("user", request)
+
+        results: List[str] = []
+        async with self.state_context(AgentState.RUNNING):
+            while (
+                self.current_step < self.max_steps and self.state != AgentState.FINISHED
+            ):
+                self.current_step += 1
+                logger.info(f"Executing step {self.current_step}/{self.max_steps}")
+                step_result = await self.step()
+
+                # Check for stuck state
+                if self.is_stuck():
+                    self.handle_stuck_state()
+
+                results.append(f"Step {self.current_step}: {step_result}")
+
+            if self.current_step >= self.max_steps:
+                results.append(f"Terminated: Reached max steps ({self.max_steps})")
+
+        return "\n".join(results) if results else "No steps executed"
+
+    @abstractmethod
+    async def step(self) -> str:
+        """Execute a single step in the agent's workflow.
+
+        Must be implemented by subclasses to define specific behavior.
+        """
+
+    def handle_stuck_state(self):
+        """Handle stuck state by adding a prompt to change strategy"""
+        stuck_prompt = "\
+        Observed duplicate responses. Consider new strategies and avoid repeating ineffective paths already attempted."
+        self.next_step_prompt = f"{stuck_prompt}\n{self.next_step_prompt}"
+        logger.warning(f"Agent detected stuck state. Added prompt: {stuck_prompt}")
+
+    def is_stuck(self) -> bool:
+        """Check if the agent is stuck in a loop by detecting duplicate content"""
+        if len(self.memory.messages) < 2:
+            return False
+
+        last_message = self.memory.messages[-1]
+        if not last_message.content:
+            return False
+
+        # Count identical content occurrences
+        duplicate_count = sum(
+            1
+            for msg in reversed(self.memory.messages[:-1])
+            if msg.role == "assistant" and msg.content == last_message.content
+        )
+
+        return duplicate_count >= self.duplicate_threshold
+
+    @property
+    def messages(self) -> List[Message]:
+        """Retrieve a list of messages from the agent's memory."""
+        return self.memory.messages
+
+    @messages.setter
+    def messages(self, value: List[Message]):
+        """Set the list of messages in the agent's memory."""
+        self.memory.messages = value
--- a/app/agent/planning.py
+++ b/app/agent/planning.py
@ -0,0 +1,285 @@
+import time
+from typing import Dict, List, Literal, Optional
+
+from pydantic import Field, model_validator
+
+from app.agent.toolcall import ToolCallAgent
+from app.logger import logger
+from app.schema import Message, ToolCall
+from app.tool import PlanningTool, Terminate, ToolCollection
+
+
+PLANNING_SYSTEM_PROMPT = """
+You are an expert Planning Agent tasked with solving complex problems by creating and managing structured plans.
+Your job is:
+1. Analyze requests to understand the task scope
+2. Create clear, actionable plans with the `planning` tool
+3. Execute steps using available tools as needed
+4. Track progress and adapt plans dynamically
+5. Use `finish` to conclude when the task is complete
+
+Available tools will vary by task but may include:
+- `planning`: Create, update, and track plans (commands: create, update, mark_step, etc.)
+- `finish`: End the task when complete
+
+Break tasks into logical, sequential steps. Think about dependencies and verification methods.
+"""
+
+NEXT_STEP_PROMPT = """
+Based on the current state, what's your next step?
+Consider:
+1. Do you need to create or refine a plan?
+2. Are you ready to execute a specific step?
+3. Have you completed the task?
+
+Provide reasoning, then select the appropriate tool or action.
+"""
+
+
+class PlanningAgent(ToolCallAgent):
+    """
+    An agent that creates and manages plans to solve tasks.
+
+    This agent uses a planning tool to create and manage structured plans,
+    and tracks progress through individual steps until task completion.
+    """
+
+    name: str = "planning"
+    description: str = "An agent that creates and manages plans to solve tasks"
+
+    system_prompt: str = PLANNING_SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(PlanningTool(), Terminate())
+    )
+    tool_choices: Literal["none", "auto", "required"] = "auto"
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+
+    tool_calls: List[ToolCall] = Field(default_factory=list)
+    active_plan_id: Optional[str] = Field(default=None)
+
+    # Add a dictionary to track the step status for each tool call
+    step_execution_tracker: Dict[str, Dict] = Field(default_factory=dict)
+    current_step_index: Optional[int] = None
+
+    max_steps: int = 20
+
+    @model_validator(mode="after")
+    def initialize_plan_and_verify_tools(self) -> "PlanningAgent":
+        """Initialize the agent with a default plan ID and validate required tools."""
+        self.active_plan_id = f"plan_{int(time.time())}"
+
+        if "planning" not in self.available_tools.tool_map:
+            self.available_tools.add_tool(PlanningTool())
+
+        return self
+
+    async def think(self) -> bool:
+        """Decide the next action based on plan status."""
+        prompt = (
+            f"CURRENT PLAN STATUS:\n{await self.get_plan()}\n\n{self.next_step_prompt}"
+            if self.active_plan_id
+            else self.next_step_prompt
+        )
+        self.messages.append(Message.user_message(prompt))
+
+        # Get the current step index before thinking
+        self.current_step_index = await self._get_current_step_index()
+
+        result = await super().think()
+
+        # After thinking, if we decided to execute a tool and it's not a planning tool or special tool,
+        # associate it with the current step for tracking
+        if result and self.tool_calls:
+            latest_tool_call = self.tool_calls[0]  # Get the most recent tool call
+            if (
+                latest_tool_call.function.name != "planning"
+                and latest_tool_call.function.name not in self.special_tool_names
+                and self.current_step_index is not None
+            ):
+                self.step_execution_tracker[latest_tool_call.id] = {
+                    "step_index": self.current_step_index,
+                    "tool_name": latest_tool_call.function.name,
+                    "status": "pending",  # Will be updated after execution
+                }
+
+        return result
+
+    async def act(self) -> str:
+        """Execute a step and track its completion status."""
+        result = await super().act()
+
+        # After executing the tool, update the plan status
+        if self.tool_calls:
+            latest_tool_call = self.tool_calls[0]
+
+            # Update the execution status to completed
+            if latest_tool_call.id in self.step_execution_tracker:
+                self.step_execution_tracker[latest_tool_call.id]["status"] = "completed"
+                self.step_execution_tracker[latest_tool_call.id]["result"] = result
+
+                # Update the plan status if this was a non-planning, non-special tool
+                if (
+                    latest_tool_call.function.name != "planning"
+                    and latest_tool_call.function.name not in self.special_tool_names
+                ):
+                    await self.update_plan_status(latest_tool_call.id)
+
+        return result
+
+    async def get_plan(self) -> str:
+        """Retrieve the current plan status."""
+        if not self.active_plan_id:
+            return "No active plan. Please create a plan first."
+
+        result = await self.available_tools.execute(
+            name="planning",
+            tool_input={"command": "get", "plan_id": self.active_plan_id},
+        )
+        return result.output if hasattr(result, "output") else str(result)
+
+    async def run(self, request: Optional[str] = None) -> str:
+        """Run the agent with an optional initial request."""
+        if request:
+            await self.create_initial_plan(request)
+        return await super().run()
+
+    async def update_plan_status(self, tool_call_id: str) -> None:
+        """
+        Update the current plan progress based on completed tool execution.
+        Only marks a step as completed if the associated tool has been successfully executed.
+        """
+        if not self.active_plan_id:
+            return
+
+        if tool_call_id not in self.step_execution_tracker:
+            logger.warning(f"No step tracking found for tool call {tool_call_id}")
+            return
+
+        tracker = self.step_execution_tracker[tool_call_id]
+        if tracker["status"] != "completed":
+            logger.warning(f"Tool call {tool_call_id} has not completed successfully")
+            return
+
+        step_index = tracker["step_index"]
+
+        try:
+            # Mark the step as completed
+            await self.available_tools.execute(
+                name="planning",
+                tool_input={
+                    "command": "mark_step",
+                    "plan_id": self.active_plan_id,
+                    "step_index": step_index,
+                    "step_status": "completed",
+                },
+            )
+            logger.info(
+                f"Marked step {step_index} as completed in plan {self.active_plan_id}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to update plan status: {e}")
+
+    async def _get_current_step_index(self) -> Optional[int]:
+        """
+        Parse the current plan to identify the first non-completed step's index.
+        Returns None if no active step is found.
+        """
+        if not self.active_plan_id:
+            return None
+
+        plan = await self.get_plan()
+
+        try:
+            plan_lines = plan.splitlines()
+            steps_index = -1
+
+            # Find the index of the "Steps:" line
+            for i, line in enumerate(plan_lines):
+                if line.strip() == "Steps:":
+                    steps_index = i
+                    break
+
+            if steps_index == -1:
+                return None
+
+            # Find the first non-completed step
+            for i, line in enumerate(plan_lines[steps_index + 1 :], start=0):
+                if "[ ]" in line or "[→]" in line:  # not_started or in_progress
+                    # Mark current step as in_progress
+                    await self.available_tools.execute(
+                        name="planning",
+                        tool_input={
+                            "command": "mark_step",
+                            "plan_id": self.active_plan_id,
+                            "step_index": i,
+                            "step_status": "in_progress",
+                        },
+                    )
+                    return i
+
+            return None  # No active step found
+        except Exception as e:
+            logger.warning(f"Error finding current step index: {e}")
+            return None
+
+    async def create_initial_plan(self, request: str) -> None:
+        """Create an initial plan based on the request."""
+        logger.info(f"Creating initial plan with ID: {self.active_plan_id}")
+
+        messages = [
+            Message.user_message(
+                f"Analyze the request and create a plan with ID {self.active_plan_id}: {request}"
+            )
+        ]
+        self.memory.add_messages(messages)
+        response = await self.llm.ask_tool(
+            messages=messages,
+            system_msgs=[Message.system_message(self.system_prompt)],
+            tools=self.available_tools.to_params(),
+            tool_choice="required",
+        )
+        assistant_msg = Message.from_tool_calls(
+            content=response.content, tool_calls=response.tool_calls
+        )
+
+        self.memory.add_message(assistant_msg)
+
+        plan_created = False
+        for tool_call in response.tool_calls:
+            if tool_call.function.name == "planning":
+                result = await self.execute_tool(tool_call)
+                logger.info(
+                    f"Executed tool {tool_call.function.name} with result: {result}"
+                )
+
+                # Add tool response to memory
+                tool_msg = Message.tool_message(
+                    content=result,
+                    tool_call_id=tool_call.id,
+                    name=tool_call.function.name,
+                )
+                self.memory.add_message(tool_msg)
+                plan_created = True
+                break
+
+        if not plan_created:
+            logger.warning("No plan created from initial request")
+            tool_msg = Message.assistant_message(
+                "Error: Parameter `plan_id` is required for command: create"
+            )
+            self.memory.add_message(tool_msg)
+
+
+async def main():
+    # Configure and run the agent
+    agent = PlanningAgent(available_tools=ToolCollection(PlanningTool(), Terminate()))
+    result = await agent.run("Help me plan a trip to the moon")
+    print(result)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
--- a/app/agent/react.py
+++ b/app/agent/react.py
@ -0,0 +1,38 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from pydantic import Field
+
+from app.agent.base import BaseAgent
+from app.llm import LLM
+from app.schema import AgentState, Memory
+
+
+class ReActAgent(BaseAgent, ABC):
+    name: str
+    description: Optional[str] = None
+
+    system_prompt: Optional[str] = None
+    next_step_prompt: Optional[str] = None
+
+    llm: Optional[LLM] = Field(default_factory=LLM)
+    memory: Memory = Field(default_factory=Memory)
+    state: AgentState = AgentState.IDLE
+
+    max_steps: int = 10
+    current_step: int = 0
+
+    @abstractmethod
+    async def think(self) -> bool:
+        """Process current state and decide next action"""
+
+    @abstractmethod
+    async def act(self) -> str:
+        """Execute decided actions"""
+
+    async def step(self) -> str:
+        """Execute a single step: think and act."""
+        should_act = await self.think()
+        if not should_act:
+            return "Thinking complete - no action needed"
+        return await self.act()
--- a/app/agent/swe.py
+++ b/app/agent/swe.py
@ -0,0 +1,37 @@
+from typing import List
+
+from pydantic import Field
+
+from app.agent.toolcall import ToolCallAgent
+from app.prompt.swe import NEXT_STEP_TEMPLATE, SYSTEM_PROMPT
+from app.tool import Bash, StrReplaceEditor, Terminate, ToolCollection
+
+
+class SWEAgent(ToolCallAgent):
+    """An agent that implements the SWEAgent paradigm for executing code and natural conversations."""
+
+    name: str = "swe"
+    description: str = "an autonomous AI programmer that interacts directly with the computer to solve tasks."
+
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_TEMPLATE
+
+    available_tools: ToolCollection = ToolCollection(
+        Bash(), StrReplaceEditor(), Terminate()
+    )
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+
+    max_steps: int = 30
+
+    bash: Bash = Field(default_factory=Bash)
+    working_dir: str = "."
+
+    async def think(self) -> bool:
+        """Process current state and decide next action"""
+        # Update working directory
+        self.working_dir = await self.bash.execute("pwd")
+        self.next_step_prompt = self.next_step_prompt.format(
+            current_dir=self.working_dir
+        )
+
+        return await super().think()
--- a/app/agent/toolcall.py
+++ b/app/agent/toolcall.py
@ -0,0 +1,171 @@
+import json
+from typing import Any, List, Literal
+
+from pydantic import Field
+
+from app.agent.react import ReActAgent
+from app.logger import logger
+from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.schema import AgentState, Message, ToolCall
+from app.tool import CreateChatCompletion, Terminate, ToolCollection
+
+
+TOOL_CALL_REQUIRED = "Tool calls required but none provided"
+
+
+class ToolCallAgent(ReActAgent):
+    """Base agent class for handling tool/function calls with enhanced abstraction"""
+
+    name: str = "toolcall"
+    description: str = "an agent that can execute tool calls."
+
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+
+    available_tools: ToolCollection = ToolCollection(
+        CreateChatCompletion(), Terminate()
+    )
+    tool_choices: Literal["none", "auto", "required"] = "auto"
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+
+    tool_calls: List[ToolCall] = Field(default_factory=list)
+
+    max_steps: int = 30
+
+    async def think(self) -> bool:
+        """Process current state and decide next actions using tools"""
+        if self.next_step_prompt:
+            user_msg = Message.user_message(self.next_step_prompt)
+            self.messages += [user_msg]
+
+        # Get response with tool options
+        response = await self.llm.ask_tool(
+            messages=self.messages,
+            system_msgs=[Message.system_message(self.system_prompt)]
+            if self.system_prompt
+            else None,
+            tools=self.available_tools.to_params(),
+            tool_choice=self.tool_choices,
+        )
+        self.tool_calls = response.tool_calls
+
+        # Log response info
+        logger.info(f"Tool content: {response.content}")
+        logger.info(
+            f"Tool calls count: {len(response.tool_calls) if response.tool_calls else 0}"
+        )
+        logger.info(f"Tool calls: {response.tool_calls}")
+
+        try:
+            # Handle different tool_choices modes
+            if self.tool_choices == "none":
+                if response.tool_calls:
+                    logger.warning("Tool calls provided when tool_choice is 'none'")
+                if response.content:
+                    self.memory.add_message(Message.assistant_message(response.content))
+                    return True
+                return False
+
+            # Create and add assistant message
+            assistant_msg = (
+                Message.from_tool_calls(
+                    content=response.content, tool_calls=self.tool_calls
+                )
+                if self.tool_calls
+                else Message.assistant_message(response.content)
+            )
+            self.memory.add_message(assistant_msg)
+
+            if self.tool_choices == "required" and not self.tool_calls:
+                return True  # Will be handled in act()
+
+            # For 'auto' mode, continue with content if no commands but content exists
+            if self.tool_choices == "auto" and not self.tool_calls:
+                return bool(response.content)
+
+            return bool(self.tool_calls)
+        except Exception as e:
+            logger.error(f"Error in thinking phase: {e}")
+            self.memory.add_message(
+                Message.assistant_message(
+                    f"Error encountered while processing: {str(e)}"
+                )
+            )
+            return False
+
+    async def act(self) -> str:
+        """Execute tool calls and handle their results"""
+        if not self.tool_calls:
+            if self.tool_choices == "required":
+                raise ValueError(TOOL_CALL_REQUIRED)
+
+            # Return last message content if no tool calls
+            return self.messages[-1].content or "No content or commands to execute"
+
+        results = []
+        for command in self.tool_calls:
+            result = await self.execute_tool(command)
+            logger.info(f"Executed tool {command.function.name} with result: {result}")
+
+            # Add tool response to memory
+            tool_msg = Message.tool_message(
+                content=result, tool_call_id=command.id, name=command.function.name
+            )
+            self.memory.add_message(tool_msg)
+            results.append(result)
+
+        return "\n\n".join(results)
+
+    async def execute_tool(self, command: ToolCall) -> str:
+        """Execute a single tool call with robust error handling"""
+        if not command or not command.function or not command.function.name:
+            return "Error: Invalid command format"
+
+        name = command.function.name
+        if name not in self.available_tools.tool_map:
+            return f"Error: Unknown tool '{name}'"
+
+        try:
+            # Parse arguments
+            args = json.loads(command.function.arguments or "{}")
+
+            # Execute the tool
+            result = await self.available_tools.execute(name=name, tool_input=args)
+
+            # Format result for display
+            observation = (
+                f"Observed output of cmd `{name}` executed:\n{str(result)}"
+                if result
+                else f"Cmd `{name}` completed with no output"
+            )
+
+            # Handle special tools like `finish`
+            await self._handle_special_tool(name=name, result=result)
+
+            return observation
+        except json.JSONDecodeError:
+            error_msg = f"Error parsing arguments for {name}: Invalid JSON format"
+            logger.error(error_msg)
+            return f"Error: {error_msg}"
+        except Exception as e:
+            error_msg = f"Error executing tool {name}: {str(e)}"
+            logger.error(error_msg)
+            return f"Error: {error_msg}"
+
+    async def _handle_special_tool(self, name: str, result: Any, **kwargs):
+        """Handle special tool execution and state changes"""
+        if not self._is_special_tool(name):
+            return
+
+        if self._should_finish_execution(name=name, result=result, **kwargs):
+            # Set agent state to finished
+            self.state = AgentState.FINISHED
+
+    @staticmethod
+    def _should_finish_execution(**kwargs) -> bool:
+        """Determine if tool execution should finish the agent"""
+        return True
+
+    def _is_special_tool(self, name: str) -> bool:
+        """Check if tool name is in special tools list"""
+        return name.lower() in [n.lower() for n in self.special_tool_names]
--- a/app/config.py
+++ b/app/config.py
@ -0,0 +1,112 @@
+import threading
+import tomllib
+from pathlib import Path
+from typing import Dict, Optional
+
+from pydantic import BaseModel, Field
+
+
+def get_project_root() -> Path:
+    """Get the project root directory"""
+    return Path(__file__).resolve().parent.parent
+
+
+PROJECT_ROOT = get_project_root()
+WORKSPACE_ROOT = PROJECT_ROOT / "workspace"
+
+
+class LLMSettings(BaseModel):
+    model: str = Field(..., description="Model name")
+    base_url: str = Field(..., description="API base URL")
+    api_key: str = Field(..., description="API key")
+    max_tokens: int = Field(4096, description="Maximum number of tokens per request")
+    temperature: float = Field(1.0, description="Sampling temperature")
+
+
+class ScreenshotSettings(BaseModel):
+    api_key: Optional[str] = Field(None, description="Screenshot API key")
+    base_url: Optional[str] = Field(None, description="Screenshot service URL")
+
+
+class AppConfig(BaseModel):
+    llm: Dict[str, LLMSettings]
+    screenshot: Optional[ScreenshotSettings] = None
+
+
+class Config:
+    _instance = None
+    _lock = threading.Lock()
+    _initialized = False
+
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        if not self._initialized:
+            with self._lock:
+                if not self._initialized:
+                    self._config = None
+                    self._load_initial_config()
+                    self._initialized = True
+
+    @staticmethod
+    def _get_config_path() -> Path:
+        root = PROJECT_ROOT
+        config_path = root / "config" / "config.toml"
+        if config_path.exists():
+            return config_path
+        example_path = root / "config" / "config.example.toml"
+        if example_path.exists():
+            return example_path
+        raise FileNotFoundError("No configuration file found in config directory")
+
+    def _load_config(self) -> dict:
+        config_path = self._get_config_path()
+        with config_path.open("rb") as f:
+            return tomllib.load(f)
+
+    def _load_initial_config(self):
+        raw_config = self._load_config()
+        base_llm = raw_config.get("llm", {})
+        llm_overrides = {
+            k: v for k, v in raw_config.get("llm", {}).items() if isinstance(v, dict)
+        }
+
+        default_settings = {
+            "model": base_llm.get("model"),
+            "base_url": base_llm.get("base_url"),
+            "api_key": base_llm.get("api_key"),
+            "max_tokens": base_llm.get("max_tokens", 4096),
+            "temperature": base_llm.get("temperature", 1.0),
+        }
+
+        config_dict = {
+            "llm": {
+                "default": default_settings,
+                **{
+                    name: {**default_settings, **override_config}
+                    for name, override_config in llm_overrides.items()
+                },
+            }
+        }
+
+        # Add screenshot config if present
+        if screenshot_config := raw_config.get("screenshot"):
+            config_dict["screenshot"] = screenshot_config
+
+        self._config = AppConfig(**config_dict)
+
+    @property
+    def screenshot(self) -> Optional[ScreenshotSettings]:
+        return self._config.screenshot
+
+    @property
+    def llm(self) -> Dict[str, LLMSettings]:
+        return self._config.llm
+
+
+config = Config()
--- a/app/exceptions.py
+++ b/app/exceptions.py
@ -0,0 +1,12 @@
+class ToolError(Exception):
+    """Raised when a tool encounters an error."""
+
+    def __init__(self, message):
+        self.message = message
+
+
+class BrowserException(Exception):
+    """Base exception for browser-related errors."""
+
+    def __init__(self, message):
+        super().__init__(message)
--- a/app/flow/init.py
+++ b/app/flow/init.py
--- a/app/flow/base.py
+++ b/app/flow/base.py
@ -0,0 +1,60 @@
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Dict, List, Optional, Union
+
+from app.agent.base import BaseAgent
+from app.agent.toolcall import ToolCallAgent
+
+
+class FlowType(str, Enum):
+    PLANNING = "planning"
+
+
+class BaseFlow(ABC):
+    """Base class for execution flows supporting multiple agents"""
+
+    def __init__(
+        self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **kwargs
+    ):
+        # Handle different ways of providing agents
+        if isinstance(agents, BaseAgent):
+            self.agents = {"default": agents}
+        elif isinstance(agents, list):
+            self.agents = {f"agent_{i}": agent for i, agent in enumerate(agents)}
+        else:
+            self.agents = agents
+
+        self.tools = kwargs.get("tools")
+        self.primary_agent_key = kwargs.get("primary_agent", None)
+
+        # If primary agent not specified, use first agent
+        if not self.primary_agent_key and self.agents:
+            self.primary_agent_key = next(iter(self.agents))
+
+        self._setup_agents()
+
+    def _setup_agents(self):
+        """Configure all agents with tools and initial setup"""
+        if self.tools:
+            for agent_key, agent in self.agents.items():
+                if isinstance(agent, ToolCallAgent):
+                    agent.available_tools = self.tools
+
+    @property
+    def primary_agent(self) -> Optional[BaseAgent]:
+        """Get the primary agent for the flow"""
+        return self.agents.get(self.primary_agent_key)
+
+    def get_agent(self, key: str) -> Optional[BaseAgent]:
+        """Get a specific agent by key"""
+        return self.agents.get(key)
+
+    def add_agent(self, key: str, agent: BaseAgent) -> None:
+        """Add a new agent to the flow"""
+        self.agents[key] = agent
+        if isinstance(agent, ToolCallAgent) and self.tools:
+            agent.available_tools = self.tools
+
+    @abstractmethod
+    async def execute(self, input_text: str) -> str:
+        """Execute the flow with given input"""
--- a/app/flow/flow_factory.py
+++ b/app/flow/flow_factory.py
@ -0,0 +1,25 @@
+from typing import Dict, List, Union
+
+from app.agent.base import BaseAgent
+from app.flow.base import BaseFlow, FlowType
+from app.flow.planning import PlanningFlow
+
+
+class FlowFactory:
+    """Factory for creating different types of flows with support for multiple agents"""
+
+    @staticmethod
+    def create_flow(
+        flow_type: FlowType,
+        agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]],
+        **kwargs,
+    ) -> BaseFlow:
+        flows = {
+            FlowType.PLANNING: PlanningFlow,
+        }
+
+        flow_class = flows.get(flow_type)
+        if not flow_class:
+            raise ValueError(f"Unknown flow type: {flow_type}")
+
+        return flow_class(agents, **kwargs)
--- a/app/flow/planning.py
+++ b/app/flow/planning.py
@ -0,0 +1,402 @@
+import time
+from typing import Dict, List, Optional, Union
+
+from app.agent.base import BaseAgent
+from app.flow.base import BaseFlow
+from app.logger import logger
+from app.schema import AgentState
+from app.tool import PlanningTool, ToolCollection
+
+
+class PlanningFlow(BaseFlow):
+    """A flow that manages planning and execution of tasks using agents."""
+
+    def __init__(
+        self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **kwargs
+    ):
+        # Initialize planning tool first
+        self.planning_tool = self._initialize_planning_tool(kwargs.get("tools"))
+
+        # If tools were provided, ensure planning tool is included
+        tools = kwargs.get("tools")
+        if tools:
+            planning_tool_exists = any(
+                isinstance(tool, PlanningTool) for tool in tools.tools
+            )
+            if not planning_tool_exists:
+                tools.add_tool(self.planning_tool)
+        else:
+            # Create a new tool collection with at least the planning tool
+            tools = ToolCollection(self.planning_tool)
+            kwargs["tools"] = tools
+
+        super().__init__(agents, **kwargs)
+
+        # Define agent roles
+        self.planner_key = kwargs.get("planner", self.primary_agent_key)
+        self.executor_keys = kwargs.get("executors", list(self.agents.keys()))
+
+        # Planning state tracking
+        self.active_plan_id = kwargs.get("plan_id", f"plan_{int(time.time())}")
+        self.current_step_index = None
+
+        # Ensure the planning tool has been initialized properly
+        if not hasattr(self.planning_tool, "_plans"):
+            self.planning_tool._plans = {}
+
+    def _initialize_planning_tool(
+        self, tools: Optional[ToolCollection]
+    ) -> PlanningTool:
+        """Initialize planning tool, reusing existing one if available"""
+        if tools:
+            for tool in tools.tools:
+                if isinstance(tool, PlanningTool):
+                    return tool
+        return PlanningTool()
+
+    @property
+    def planner(self) -> Optional[BaseAgent]:
+        """Get the planning agent"""
+        return (
+            self.agents.get(self.planner_key)
+            if self.planner_key
+            else self.primary_agent
+        )
+
+    def get_executor(self, step_type: Optional[str] = None) -> BaseAgent:
+        """
+        Get an appropriate executor agent for the current step.
+        Can be extended to select agents based on step type/requirements.
+        """
+        # If step type is provided and matches an agent key, use that agent
+        if step_type and step_type in self.agents:
+            return self.agents[step_type]
+
+        # Otherwise use the first available executor or fall back to primary agent
+        for key in self.executor_keys:
+            if key in self.agents:
+                return self.agents[key]
+
+        # Fallback to primary agent
+        return self.primary_agent
+
+    async def execute(self, input_text: str) -> str:
+        """Execute the planning flow with agents."""
+        try:
+            if not self.primary_agent:
+                raise ValueError("No primary agent available")
+
+            # Create initial plan if input provided
+            if input_text:
+                await self._create_initial_plan(input_text)
+
+                # Verify plan was created successfully
+                if self.active_plan_id not in self.planning_tool._plans:
+                    logger.error(
+                        f"Plan creation failed. Plan ID {self.active_plan_id} not found in planning tool."
+                    )
+                    return f"Failed to create plan for: {input_text}"
+
+            result = ""
+            while True:
+                # Get current step to execute
+                self.current_step_index, step_info = await self._get_current_step_info()
+
+                # Exit if no more steps or plan completed
+                if self.current_step_index is None:
+                    result += await self._finalize_plan()
+                    break
+
+                # Execute current step with appropriate agent
+                step_type = step_info.get("type") if step_info else None
+                executor = self.get_executor(step_type)
+                step_result = await self._execute_step(executor, step_info)
+                result += step_result + "\n"
+
+                # Check if agent wants to terminate
+                if hasattr(executor, "state") and executor.state == AgentState.FINISHED:
+                    break
+
+            return result
+        except Exception as e:
+            logger.error(f"Error in PlanningFlow: {str(e)}")
+            return f"Execution failed: {str(e)}"
+
+    async def _create_initial_plan(self, request: str) -> None:
+        """Create an initial plan based on the request using an appropriate agent."""
+        logger.info(f"Creating initial plan with ID: {self.active_plan_id}")
+
+        agent = self.planner if self.planner else self.primary_agent
+
+        # First, directly create an empty plan to ensure the plan ID exists
+        self.planning_tool._plans[self.active_plan_id] = {
+            "title": f"Plan for: {request[:50]}{'...' if len(request) > 50 else ''}",
+            "description": f"Auto-generated plan for request: {request}",
+            "steps": [],
+            "step_status": {},
+            "created_at": time.time(),
+            "updated_at": time.time(),
+        }
+
+        # Use agent.run to create the plan
+        plan_prompt = f"""
+        I need you to create a detailed plan to accomplish this task:
+
+        {request}
+
+        Please create a plan with ID {self.active_plan_id} using the planning tool.
+        The plan should include all necessary steps to complete the task.
+        """
+
+        try:
+            plan_result = await agent.run(plan_prompt)
+            logger.info(f"Plan creation result: {plan_result[:200]}...")
+
+            # Verify the plan was created
+            if (
+                self.active_plan_id not in self.planning_tool._plans
+                or not self.planning_tool._plans[self.active_plan_id].get("steps")
+            ):
+                logger.warning(
+                    "Plan may not have been created properly. Creating default plan."
+                )
+                await self._create_default_plan(request)
+        except Exception as e:
+            logger.error(f"Error creating plan: {e}")
+            await self._create_default_plan(request)
+
+    async def _create_default_plan(self, request: str) -> None:
+        """Create a default plan if the agent fails to create one."""
+        try:
+            # Try using the planning tool directly
+            await self.planning_tool.execute(
+                command="create",
+                plan_id=self.active_plan_id,
+                title=f"Plan for: {request[:50]}{'...' if len(request) > 50 else ''}",
+                description=f"Auto-generated plan for request: {request}",
+                steps=["Analyze request", "Execute task", "Verify results"],
+            )
+        except Exception as e:
+            logger.error(f"Failed to create default plan with planning tool: {e}")
+            # Create plan directly in the planning tool's storage
+            self.planning_tool._plans[self.active_plan_id] = {
+                "title": f"Emergency Plan for: {request[:50]}{'...' if len(request) > 50 else ''}",
+                "description": f"Emergency auto-generated plan for request: {request}",
+                "steps": ["Analyze request", "Execute task", "Verify results"],
+                "step_status": {
+                    "0": "not_started",
+                    "1": "not_started",
+                    "2": "not_started",
+                },
+                "created_at": time.time(),
+                "updated_at": time.time(),
+            }
+
+        logger.info(f"Created default plan with ID: {self.active_plan_id}")
+
+    async def _get_current_step_info(self) -> tuple[Optional[int], Optional[dict]]:
+        """
+        Parse the current plan to identify the first non-completed step's index and info.
+        Returns (None, None) if no active step is found.
+        """
+        if (
+            not self.active_plan_id
+            or self.active_plan_id not in self.planning_tool._plans
+        ):
+            logger.error(f"Plan with ID {self.active_plan_id} not found")
+            return None, None
+
+        try:
+            # Direct access to step status from planning tool storage
+            plan_data = self.planning_tool._plans[self.active_plan_id]
+            steps = plan_data.get("steps", [])
+            step_status = plan_data.get("step_status", {})
+
+            # Find first non-completed step
+            for i, step in enumerate(steps):
+                status = step_status.get(str(i), "not_started")
+                if status in ["not_started", "in_progress"]:
+                    # Extract step type/category if available
+                    step_info = {"text": step}
+
+                    # Try to extract step type from the text (e.g., [SEARCH] or [CODE])
+                    import re
+
+                    type_match = re.search(r"\[([A-Z_]+)\]", step)
+                    if type_match:
+                        step_info["type"] = type_match.group(1).lower()
+
+                    # Mark current step as in_progress
+                    try:
+                        await self.planning_tool.execute(
+                            command="mark_step",
+                            plan_id=self.active_plan_id,
+                            step_index=i,
+                            step_status="in_progress",
+                        )
+                    except Exception as e:
+                        logger.warning(f"Error marking step as in_progress: {e}")
+                        # Update step status directly
+                        step_status[str(i)] = "in_progress"
+                        plan_data["step_status"] = step_status
+                        plan_data["updated_at"] = time.time()
+
+                    return i, step_info
+
+            return None, None  # No active step found
+
+        except Exception as e:
+            logger.warning(f"Error finding current step index: {e}")
+            return None, None
+
+    async def _execute_step(self, executor: BaseAgent, step_info: dict) -> str:
+        """Execute the current step with the specified agent using agent.run()."""
+        # Prepare context for the agent with current plan status
+        plan_status = await self._get_plan_text()
+        step_text = step_info.get("text", f"Step {self.current_step_index}")
+
+        # Create a prompt for the agent to execute the current step
+        step_prompt = f"""
+        CURRENT PLAN STATUS:
+        {plan_status}
+
+        YOUR CURRENT TASK:
+        You are now working on step {self.current_step_index}: "{step_text}"
+
+        Please execute this step using the appropriate tools. When you're done, provide a summary of what you accomplished.
+        """
+
+        # Use agent.run() to execute the step
+        try:
+            step_result = await executor.run(step_prompt)
+
+            # Mark the step as completed after successful execution
+            await self._mark_step_completed()
+
+            return step_result
+        except Exception as e:
+            logger.error(f"Error executing step {self.current_step_index}: {e}")
+            return f"Error executing step {self.current_step_index}: {str(e)}"
+
+    async def _mark_step_completed(self) -> None:
+        """Mark the current step as completed."""
+        if self.current_step_index is None:
+            return
+
+        try:
+            # Mark the step as completed
+            await self.planning_tool.execute(
+                command="mark_step",
+                plan_id=self.active_plan_id,
+                step_index=self.current_step_index,
+                step_status="completed",
+            )
+            logger.info(
+                f"Marked step {self.current_step_index} as completed in plan {self.active_plan_id}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to update plan status: {e}")
+            # Update step status directly in planning tool storage
+            if self.active_plan_id in self.planning_tool._plans:
+                plan_data = self.planning_tool._plans[self.active_plan_id]
+                step_status = plan_data.get("step_status", {})
+                step_status[str(self.current_step_index)] = "completed"
+                plan_data["step_status"] = step_status
+                plan_data["updated_at"] = time.time()
+
+    async def _get_plan_text(self) -> str:
+        """Get the current plan as formatted text."""
+        try:
+            result = await self.planning_tool.execute(
+                command="get", plan_id=self.active_plan_id
+            )
+            return result.output if hasattr(result, "output") else str(result)
+        except Exception as e:
+            logger.error(f"Error getting plan: {e}")
+            return self._generate_plan_text_from_storage()
+
+    def _generate_plan_text_from_storage(self) -> str:
+        """Generate plan text directly from storage if the planning tool fails."""
+        try:
+            if self.active_plan_id not in self.planning_tool._plans:
+                return f"Error: Plan with ID {self.active_plan_id} not found"
+
+            plan_data = self.planning_tool._plans[self.active_plan_id]
+            title = plan_data.get("title", "Untitled Plan")
+            description = plan_data.get("description", "")
+            steps = plan_data.get("steps", [])
+            step_status = plan_data.get("step_status", {})
+
+            # Count steps by status
+            status_counts = {
+                "completed": 0,
+                "in_progress": 0,
+                "blocked": 0,
+                "not_started": 0,
+            }
+            for status in step_status.values():
+                if status in status_counts:
+                    status_counts[status] += 1
+
+            completed = status_counts["completed"]
+            total = len(steps)
+            progress = (completed / total) * 100 if total > 0 else 0
+
+            plan_text = f"Plan: {title} (ID: {self.active_plan_id})\n"
+            plan_text += "=" * len(plan_text) + "\n\n"
+            plan_text += f"{description}\n\n" if description else ""
+            plan_text += (
+                f"Progress: {completed}/{total} steps completed ({progress:.1f}%)\n"
+            )
+            plan_text += f"Status: {status_counts['completed']} completed, {status_counts['in_progress']} in progress, "
+            plan_text += f"{status_counts['blocked']} blocked, {status_counts['not_started']} not started\n\n"
+            plan_text += "Steps:\n"
+
+            for i, step in enumerate(steps):
+                status = step_status.get(str(i), "not_started")
+                if status == "completed":
+                    status_mark = "[✓]"
+                elif status == "in_progress":
+                    status_mark = "[→]"
+                elif status == "blocked":
+                    status_mark = "[!]"
+                else:  # not_started
+                    status_mark = "[ ]"
+
+                plan_text += f"{i}. {status_mark} {step}\n"
+
+            return plan_text
+        except Exception as e:
+            logger.error(f"Error generating plan text from storage: {e}")
+            return f"Error: Unable to retrieve plan with ID {self.active_plan_id}"
+
+    async def _get_plan(self) -> dict:
+        """Get the current plan as a dictionary."""
+        if (
+            not self.active_plan_id
+            or self.active_plan_id not in self.planning_tool._plans
+        ):
+            return {}
+        return self.planning_tool._plans[self.active_plan_id]
+
+    async def _finalize_plan(self) -> str:
+        """Finalize the plan and provide a summary using an appropriate agent."""
+        agent = self.planner if self.planner else self.primary_agent
+        plan_text = await self._get_plan_text()
+
+        # Create a summary prompt
+        summary_prompt = f"""
+        The plan has been completed. Here is the final plan status:
+
+        {plan_text}
+
+        Please provide a summary of what was accomplished and any final thoughts.
+        """
+
+        # Use agent.run() to generate the summary
+        try:
+            summary = await agent.run(summary_prompt)
+            return f"Plan completed:\n\n{summary}"
+        except Exception as e:
+            logger.error(f"Error finalizing plan: {e}")
+            return "Plan completed. Error generating summary."
--- a/app/llm.py
+++ b/app/llm.py
@ -0,0 +1,254 @@
+from typing import Dict, List, Literal, Optional, Union
+
+from openai import (
+    APIError,
+    AsyncOpenAI,
+    AuthenticationError,
+    OpenAIError,
+    RateLimitError,
+)
+from tenacity import retry, stop_after_attempt, wait_random_exponential
+
+from app.config import LLMSettings, config
+from app.logger import logger  # Assuming a logger is set up in your app
+from app.schema import Message
+
+
+class LLM:
+    _instances: Dict[str, "LLM"] = {}
+
+    def __new__(
+        cls, config_name: str = "default", llm_config: Optional[LLMSettings] = None
+    ):
+        if config_name not in cls._instances:
+            instance = super().__new__(cls)
+            instance.__init__(config_name, llm_config)
+            cls._instances[config_name] = instance
+        return cls._instances[config_name]
+
+    def __init__(
+        self, config_name: str = "default", llm_config: Optional[LLMSettings] = None
+    ):
+        if not hasattr(self, "client"):  # Only initialize if not already initialized
+            llm_config = llm_config or config.llm
+            llm_config = llm_config.get(config_name, llm_config["default"])
+            self.model = llm_config.model
+            self.max_tokens = llm_config.max_tokens
+            self.temperature = llm_config.temperature
+            self.client = AsyncOpenAI(
+                api_key=llm_config.api_key, base_url=llm_config.base_url
+            )
+
+    @staticmethod
+    def format_messages(messages: List[Union[dict, Message]]) -> List[dict]:
+        """
+        Format messages for LLM by converting them to OpenAI message format.
+
+        Args:
+            messages: List of messages that can be either dict or Message objects
+
+        Returns:
+            List[dict]: List of formatted messages in OpenAI format
+
+        Raises:
+            ValueError: If messages are invalid or missing required fields
+            TypeError: If unsupported message types are provided
+
+        Examples:
+            >>> msgs = [
+            ...     Message.system_message("You are a helpful assistant"),
+            ...     {"role": "user", "content": "Hello"},
+            ...     Message.user_message("How are you?")
+            ... ]
+            >>> formatted = LLM.format_messages(msgs)
+        """
+        formatted_messages = []
+
+        for message in messages:
+            if isinstance(message, dict):
+                # If message is already a dict, ensure it has required fields
+                if "role" not in message:
+                    raise ValueError("Message dict must contain 'role' field")
+                formatted_messages.append(message)
+            elif isinstance(message, Message):
+                # If message is a Message object, convert it to dict
+                formatted_messages.append(message.to_dict())
+            else:
+                raise TypeError(f"Unsupported message type: {type(message)}")
+
+        # Validate all messages have required fields
+        for msg in formatted_messages:
+            if msg["role"] not in ["system", "user", "assistant", "tool"]:
+                raise ValueError(f"Invalid role: {msg['role']}")
+            if "content" not in msg and "tool_calls" not in msg:
+                raise ValueError(
+                    "Message must contain either 'content' or 'tool_calls'"
+                )
+
+        return formatted_messages
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+    )
+    async def ask(
+        self,
+        messages: List[Union[dict, Message]],
+        system_msgs: Optional[List[Union[dict, Message]]] = None,
+        stream: bool = True,
+        temperature: Optional[float] = None,
+    ) -> str:
+        """
+        Send a prompt to the LLM and get the response.
+
+        Args:
+            messages: List of conversation messages
+            system_msgs: Optional system messages to prepend
+            stream (bool): Whether to stream the response
+            temperature (float): Sampling temperature for the response
+
+        Returns:
+            str: The generated response
+
+        Raises:
+            ValueError: If messages are invalid or response is empty
+            OpenAIError: If API call fails after retries
+            Exception: For unexpected errors
+        """
+        try:
+            # Format system and user messages
+            if system_msgs:
+                system_msgs = self.format_messages(system_msgs)
+                messages = system_msgs + self.format_messages(messages)
+            else:
+                messages = self.format_messages(messages)
+
+            if not stream:
+                # Non-streaming request
+                response = await self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    max_tokens=self.max_tokens,
+                    temperature=temperature or self.temperature,
+                    stream=False,
+                )
+                if not response.choices or not response.choices[0].message.content:
+                    raise ValueError("Empty or invalid response from LLM")
+                return response.choices[0].message.content
+
+            # Streaming request
+            response = await self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                max_tokens=self.max_tokens,
+                temperature=temperature or self.temperature,
+                stream=True,
+            )
+
+            collected_messages = []
+            async for chunk in response:
+                chunk_message = chunk.choices[0].delta.content or ""
+                collected_messages.append(chunk_message)
+                print(chunk_message, end="", flush=True)
+
+            print()  # Newline after streaming
+            full_response = "".join(collected_messages).strip()
+            if not full_response:
+                raise ValueError("Empty response from streaming LLM")
+            return full_response
+
+        except ValueError as ve:
+            logger.error(f"Validation error: {ve}")
+            raise
+        except OpenAIError as oe:
+            logger.error(f"OpenAI API error: {oe}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in ask: {e}")
+            raise
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+    )
+    async def ask_tool(
+        self,
+        messages: List[Union[dict, Message]],
+        system_msgs: Optional[List[Union[dict, Message]]] = None,
+        timeout: int = 60,
+        tools: Optional[List[dict]] = None,
+        tool_choice: Literal["none", "auto", "required"] = "auto",
+        temperature: Optional[float] = None,
+        **kwargs,
+    ):
+        """
+        Ask LLM using functions/tools and return the response.
+
+        Args:
+            messages: List of conversation messages
+            system_msgs: Optional system messages to prepend
+            timeout: Request timeout in seconds
+            tools: List of tools to use
+            tool_choice: Tool choice strategy
+            temperature: Sampling temperature for the response
+            **kwargs: Additional completion arguments
+
+        Returns:
+            ChatCompletionMessage: The model's response
+
+        Raises:
+            ValueError: If tools, tool_choice, or messages are invalid
+            OpenAIError: If API call fails after retries
+            Exception: For unexpected errors
+        """
+        try:
+            # Validate tool_choice
+            if tool_choice not in ["none", "auto", "required"]:
+                raise ValueError(f"Invalid tool_choice: {tool_choice}")
+
+            # Format messages
+            if system_msgs:
+                system_msgs = self.format_messages(system_msgs)
+                messages = system_msgs + self.format_messages(messages)
+            else:
+                messages = self.format_messages(messages)
+
+            # Validate tools if provided
+            if tools:
+                for tool in tools:
+                    if not isinstance(tool, dict) or "type" not in tool:
+                        raise ValueError("Each tool must be a dict with 'type' field")
+
+            # Set up the completion request
+            response = await self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=temperature or self.temperature,
+                max_tokens=self.max_tokens,
+                tools=tools,
+                tool_choice=tool_choice,
+                timeout=timeout,
+                **kwargs,
+            )
+
+            # Check if response is valid
+            if not response.choices or not response.choices[0].message:
+                print(response)
+                raise ValueError("Invalid or empty response from LLM")
+
+            return response.choices[0].message
+
+        except ValueError as ve:
+            logger.error(f"Validation error in ask_tool: {ve}")
+            raise
+        except OpenAIError as oe:
+            if isinstance(oe, AuthenticationError):
+                logger.error("Authentication failed. Check API key.")
+            elif isinstance(oe, RateLimitError):
+                logger.error("Rate limit exceeded. Consider increasing retry attempts.")
+            elif isinstance(oe, APIError):
+                logger.error(f"API error: {oe}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in ask_tool: {e}")
+            raise
--- a/app/logger.py
+++ b/app/logger.py
@ -0,0 +1,42 @@
+import sys
+from datetime import datetime
+
+from loguru import logger as _logger
+
+from app.config import PROJECT_ROOT
+
+
+_print_level = "INFO"
+
+
+def define_log_level(print_level="INFO", logfile_level="DEBUG", name: str = None):
+    """Adjust the log level to above level"""
+    global _print_level
+    _print_level = print_level
+
+    current_date = datetime.now()
+    formatted_date = current_date.strftime("%Y%m%d")
+    log_name = (
+        f"{name}_{formatted_date}" if name else formatted_date
+    )  # name a log with prefix name
+
+    _logger.remove()
+    _logger.add(sys.stderr, level=print_level)
+    _logger.add(PROJECT_ROOT / f"logs/{log_name}.txt", level=logfile_level)
+    return _logger
+
+
+logger = define_log_level()
+
+
+if __name__ == "__main__":
+    logger.info("Starting application")
+    logger.debug("Debug message")
+    logger.warning("Warning message")
+    logger.error("Error message")
+    logger.critical("Critical message")
+
+    try:
+        raise ValueError("Test error")
+    except Exception as e:
+        logger.exception(f"An error occurred: {e}")
--- a/app/loop.py
+++ b/app/loop.py
@ -0,0 +1,21 @@
+from typing import List, Optional
+
+from app.agent.base import BaseAgent
+from app.flow.base import FlowType
+from app.flow.flow_factory import FlowFactory
+from app.tool import BaseTool, ToolCollection
+
+
+async def loop(
+    agent: BaseAgent,
+    tools: Optional[List[BaseTool]] = None,
+    flow_type: FlowType = FlowType.PLANNING,
+    input_text: str = "",
+    **loop_kwargs,
+) -> str:
+    """Main entry point for running an agent with specified flow type"""
+    tool_collection = ToolCollection(*tools) if tools else None
+    flow = FlowFactory.create_flow(
+        flow_type, agent, tool_collection=tool_collection, **loop_kwargs
+    )
+    return await flow.execute(input_text)
--- a/app/prompt/init.py
+++ b/app/prompt/init.py
--- a/app/prompt/swe.py
+++ b/app/prompt/swe.py
@ -0,0 +1,72 @@
+SYSTEM_PROMPT = """SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface.
+
+The special interface consists of a file editor that shows you {{WINDOW}} lines of a file at a time.
+In addition to typical bash commands, you can also use specific commands to help you navigate and edit files.
+To call a command, you need to invoke it with a function call/tool call.
+
+Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION.
+If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+
+RESPONSE FORMAT:
+Your shell prompt is formatted as follows:
+(Open file: <path>)
+(Current directory: <cwd>)
+bash-$
+
+First, you should _always_ include a general thought about what you're going to do next.
+Then, for every response, you must include exactly _ONE_ tool call/function call.
+
+Remember, you should always include a _SINGLE_ tool call/function call and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
+If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first tool call, and then after receiving a response you'll be able to issue the second tool call.
+Note that the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
+"""
+
+NEXT_STEP_TEMPLATE = """{{observation}}
+(Open file: {{open_file}})
+(Current directory: {{working_dir}})
+bash-$
+"""
+
+NEXT_STEP_NO_OUTPUT_TEMPLATE = """Your command ran successfully and did not produce any output.
+(Open file: {{open_file}})
+(Current directory: {{working_dir}})
+bash-$
+"""
+
+INSTANCE_TEMPLATE = """We're currently solving the following issue within our repository. Here's the issue text:
+ISSUE:
+{{problem_statement}}
+
+INSTRUCTIONS:
+Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
+Remember, YOU SHOULD ALWAYS INCLUDE EXACTLY ONE TOOL CALL/FUNCTION CALL PER RESPONSE.
+When you're satisfied with all of the changes you've made, you can submit your changes to the code base by simply running the submit command.
+Note however that you cannot use any interactive session commands (e.g. python, vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with the python <script_name>.py`.
+
+NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
+
+IMPORTANT TIPS:
+1. Always start by trying to replicate the bug that the issues discusses.
+If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug.
+Then start trying to fix it.
+When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed.
+
+If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file,
+so that you can be sure that the script indeed ran fine all the way through.
+
+2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
+
+3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
+
+4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file "buggy-input.png" If that doesn't work, use the linux 'find' command.
+
+5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current  open file.
+
+6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
+
+7. Do not try to install any packages with `pip`, `conda`, or any other way. This will usually not work. If the environment is not set up correctly, try to fix the issue without executing python code or running any tests that require the package installed.
+
+
+(Open file: {{open_file}})
+(Current directory: {{working_dir}})
+bash-$"""
--- a/app/prompt/toolcall.py
+++ b/app/prompt/toolcall.py
@ -0,0 +1,5 @@
+SYSTEM_PROMPT = "You are an agent that can execute tool calls"
+
+NEXT_STEP_PROMPT = (
+    "If you want to stop interaction, use `terminate` tool/function call."
+)
--- a/app/runtime/init.py
+++ b/app/runtime/init.py
--- a/app/runtime/browser_env.py
+++ b/app/runtime/browser_env.py
@ -0,0 +1,259 @@
+import atexit
+import base64
+import io
+import json
+import multiprocessing
+import platform
+import time
+import uuid
+
+import browsergym.core  # noqa F401 (we register the openended task as a gym environment)
+import gymnasium as gym
+import html2text
+import numpy as np
+import tenacity
+from browsergym.utils.obs import flatten_dom_to_str
+from PIL import Image
+
+from app.exceptions import BrowserException
+from app.logger import logger
+from app.utils.shutdown_listener import should_continue, should_exit
+
+
+BROWSER_EVAL_GET_GOAL_ACTION = "GET_EVAL_GOAL"
+BROWSER_EVAL_GET_REWARDS_ACTION = "GET_EVAL_REWARDS"
+
+
+class BrowserEnv:
+    def __init__(self, browsergym_eval_env: str | None = None, headless: bool = False):
+        """
+        Initialize the browser environment.
+
+        Args:
+            browsergym_eval_env: Optional evaluation environment name
+            headless: Whether to run the browser in headless mode (no UI)
+        """
+        self.html_text_converter = self.get_html_text_converter()
+        self.eval_mode = False
+        self.eval_dir = ""
+        self.browsergym_eval_env = browsergym_eval_env
+        self.eval_mode = bool(browsergym_eval_env)
+        self.headless = headless
+
+        # Set multiprocessing start method
+        if platform.system() == "Windows":
+            multiprocessing.set_start_method("spawn", force=True)
+        else:
+            multiprocessing.set_start_method("fork", force=True)
+
+        self.browser_side, self.agent_side = multiprocessing.Pipe()
+        self.process = None  # Initialize process as None
+        self.init_browser()
+        atexit.register(self.close)
+
+    def get_html_text_converter(self):
+        html_text_converter = html2text.HTML2Text()
+        # ignore links and images
+        html_text_converter.ignore_links = False
+        html_text_converter.ignore_images = True
+        # use alt text for images
+        html_text_converter.images_to_alt = True
+        # disable auto text wrapping
+        html_text_converter.body_width = 0
+        return html_text_converter
+
+    @tenacity.retry(
+        wait=tenacity.wait_fixed(1),
+        stop=tenacity.stop_after_attempt(5),
+        retry=tenacity.retry_if_exception_type(BrowserException),
+    )
+    def init_browser(self):
+        logger.debug(f"Starting browser env (headless: {self.headless})...")
+        try:
+            self.process = multiprocessing.Process(
+                target=self.browser_process, args=(self.headless,)
+            )
+            self.process.start()
+        except Exception as e:
+            logger.error(f"Failed to start browser process: {e}")
+            if self.process is not None:
+                self.process.terminate()
+            raise BrowserException("Failed to start browser environment.")
+
+        if not self.check_alive():
+            self.close()
+            raise BrowserException("Failed to start browser environment.")
+
+    def browser_process(self, headless: bool):
+        if self.eval_mode:
+            assert self.browsergym_eval_env is not None
+            logger.debug("Initializing browser env for web browsing evaluation.")
+            if "webarena" in self.browsergym_eval_env:
+                import browsergym.webarena  # noqa F401 register webarena tasks as gym environments
+            elif "miniwob" in self.browsergym_eval_env:
+                import browsergym.miniwob  # noqa F401 register miniwob tasks as gym environments
+            else:
+                raise ValueError(
+                    f"Unsupported browsergym eval env: {self.browsergym_eval_env}"
+                )
+            env = gym.make(
+                self.browsergym_eval_env,
+                tags_to_mark="all",
+                headless=headless,
+            )
+        else:
+            env = gym.make(
+                "browsergym/openended",
+                task_kwargs={"start_url": "about:blank", "goal": "PLACEHOLDER_GOAL"},
+                wait_for_user_message=False,
+                headless=headless,
+                disable_env_checker=True,
+                tags_to_mark="all",
+            )
+
+        obs, info = env.reset()
+
+        # EVAL ONLY: save the goal into file for evaluation
+        self.eval_goal = None
+        self.eval_rewards: list[float] = [0]
+        if self.eval_mode:
+            logger.debug(f"Browsing goal: {obs['goal']}")
+            self.eval_goal = obs["goal"]
+
+        logger.debug(
+            f"Browser env started in {'headless' if headless else 'visible'} mode."
+        )
+        while should_continue():
+            try:
+                if self.browser_side.poll(timeout=0.01):
+                    unique_request_id, action_data = self.browser_side.recv()
+
+                    # shutdown the browser environment
+                    if unique_request_id == "SHUTDOWN":
+                        logger.debug("SHUTDOWN recv, shutting down browser env...")
+                        env.close()
+                        return
+                    elif unique_request_id == "IS_ALIVE":
+                        self.browser_side.send(("ALIVE", None))
+                        continue
+
+                    # EVAL ONLY: Get evaluation info
+                    if action_data["action"] == BROWSER_EVAL_GET_GOAL_ACTION:
+                        self.browser_side.send(
+                            (unique_request_id, {"text_content": self.eval_goal})
+                        )
+                        continue
+                    elif action_data["action"] == BROWSER_EVAL_GET_REWARDS_ACTION:
+                        self.browser_side.send(
+                            (
+                                unique_request_id,
+                                {"text_content": json.dumps(self.eval_rewards)},
+                            )
+                        )
+                        continue
+
+                    action = action_data["action"]
+                    obs, reward, terminated, truncated, info = env.step(action)
+
+                    # EVAL ONLY: Save the rewards into file for evaluation
+                    if self.eval_mode:
+                        self.eval_rewards.append(reward)
+
+                    # add text content of the page
+                    html_str = flatten_dom_to_str(obs["dom_object"])
+                    obs["text_content"] = self.html_text_converter.handle(html_str)
+                    # make observation serializable
+                    obs["screenshot"] = self.image_to_png_base64_url(obs["screenshot"])
+                    obs["active_page_index"] = obs["active_page_index"].item()
+                    obs["elapsed_time"] = obs["elapsed_time"].item()
+                    self.browser_side.send((unique_request_id, obs))
+            except KeyboardInterrupt:
+                logger.debug("Browser env process interrupted by user.")
+                try:
+                    env.close()
+                except Exception:
+                    pass
+                return
+
+    def step(self, action_str: str, timeout: float = 30) -> dict:
+        """Execute an action in the browser environment and return the observation."""
+        unique_request_id = str(uuid.uuid4())
+        self.agent_side.send((unique_request_id, {"action": action_str}))
+        start_time = time.time()
+        while True:
+            if should_exit() or time.time() - start_time > timeout:
+                raise TimeoutError("Browser environment took too long to respond.")
+            if self.agent_side.poll(timeout=0.01):
+                response_id, obs = self.agent_side.recv()
+                if response_id == unique_request_id:
+                    return obs
+
+    def check_alive(self, timeout: float = 60):
+        self.agent_side.send(("IS_ALIVE", None))
+        if self.agent_side.poll(timeout=timeout):
+            response_id, _ = self.agent_side.recv()
+            if response_id == "ALIVE":
+                return True
+            logger.debug(f"Browser env is not alive. Response ID: {response_id}")
+
+    def close(self):
+        if (
+            not hasattr(self, "process")
+            or self.process is None
+            or not self.process.is_alive()
+        ):
+            return
+        try:
+            self.agent_side.send(("SHUTDOWN", None))
+            self.process.join(5)  # Wait for the process to terminate
+            if self.process.is_alive():
+                logger.error(
+                    "Browser process did not terminate, forcefully terminating..."
+                )
+                self.process.terminate()
+                self.process.join(5)  # Wait for the process to terminate
+                if self.process.is_alive():
+                    self.process.kill()
+                    self.process.join(5)  # Wait for the process to terminate
+            self.agent_side.close()
+            self.browser_side.close()
+        except Exception:
+            logger.error("Encountered an error when closing browser env", exc_info=True)
+
+    @staticmethod
+    def image_to_png_base64_url(
+        image: np.ndarray | Image.Image, add_data_prefix: bool = False
+    ):
+        """Convert a numpy array to a base64 encoded png image url."""
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        if image.mode in ("RGBA", "LA"):
+            image = image.convert("RGB")
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+
+        image_base64 = base64.b64encode(buffered.getvalue()).decode()
+        return (
+            f"data:image/png;base64,{image_base64}"
+            if add_data_prefix
+            else f"{image_base64}"
+        )
+
+    @staticmethod
+    def image_to_jpg_base64_url(
+        image: np.ndarray | Image.Image, add_data_prefix: bool = False
+    ):
+        """Convert a numpy array to a base64 encoded jpeg image url."""
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+        if image.mode in ("RGBA", "LA"):
+            image = image.convert("RGB")
+        buffered = io.BytesIO()
+        image.save(buffered, format="JPEG")
+
+        image_base64 = base64.b64encode(buffered.getvalue()).decode()
+        return (
+            f"data:image/jpeg;base64,{image_base64}"
+            if add_data_prefix
+            else f"{image_base64}"
+        )
--- a/app/schema.py
+++ b/app/schema.py
@ -0,0 +1,135 @@
+from enum import Enum
+from typing import Any, List, Literal, Optional, Union
+
+from pydantic import BaseModel, Field
+
+
+class AgentState(str, Enum):
+    """Agent execution states"""
+
+    IDLE = "IDLE"
+    RUNNING = "RUNNING"
+    FINISHED = "FINISHED"
+    ERROR = "ERROR"
+
+
+class Function(BaseModel):
+    name: str
+    arguments: str
+
+
+class ToolCall(BaseModel):
+    """Represents a tool/function call in a message"""
+
+    id: str
+    type: str = "function"
+    function: Function
+
+
+class Message(BaseModel):
+    """Represents a chat message in the conversation"""
+
+    role: Literal["system", "user", "assistant", "tool"] = Field(...)
+    content: Optional[str] = Field(default=None)
+    tool_calls: Optional[List[ToolCall]] = Field(default=None)
+    name: Optional[str] = Field(default=None)
+    tool_call_id: Optional[str] = Field(default=None)
+
+    def __add__(self, other) -> List["Message"]:
+        """支持 Message + list 或 Message + Message 的操作"""
+        if isinstance(other, list):
+            return [self] + other
+        elif isinstance(other, Message):
+            return [self, other]
+        else:
+            raise TypeError(
+                f"unsupported operand type(s) for +: '{type(self).__name__}' and '{type(other).__name__}'"
+            )
+
+    def __radd__(self, other) -> List["Message"]:
+        """支持 list + Message 的操作"""
+        if isinstance(other, list):
+            return other + [self]
+        else:
+            raise TypeError(
+                f"unsupported operand type(s) for +: '{type(other).__name__}' and '{type(self).__name__}'"
+            )
+
+    def to_dict(self) -> dict:
+        """Convert message to dictionary format"""
+        message = {"role": self.role}
+        if self.content is not None:
+            message["content"] = self.content
+        if self.tool_calls is not None:
+            message["tool_calls"] = [tool_call.dict() for tool_call in self.tool_calls]
+        if self.name is not None:
+            message["name"] = self.name
+        if self.tool_call_id is not None:
+            message["tool_call_id"] = self.tool_call_id
+        return message
+
+    @classmethod
+    def user_message(cls, content: str) -> "Message":
+        """Create a user message"""
+        return cls(role="user", content=content)
+
+    @classmethod
+    def system_message(cls, content: str) -> "Message":
+        """Create a system message"""
+        return cls(role="system", content=content)
+
+    @classmethod
+    def assistant_message(cls, content: Optional[str] = None) -> "Message":
+        """Create an assistant message"""
+        return cls(role="assistant", content=content)
+
+    @classmethod
+    def tool_message(cls, content: str, name, tool_call_id: str) -> "Message":
+        """Create a tool message"""
+        return cls(role="tool", content=content, name=name, tool_call_id=tool_call_id)
+
+    @classmethod
+    def from_tool_calls(
+        cls, tool_calls: List[Any], content: Union[str, List[str]] = "", **kwargs
+    ):
+        """Create ToolCallsMessage from raw tool calls.
+
+        Args:
+            tool_calls: Raw tool calls from LLM
+            content: Optional message content
+        """
+        formatted_calls = [
+            {"id": call.id, "function": call.function.model_dump(), "type": "function"}
+            for call in tool_calls
+        ]
+        return cls(
+            role="assistant", content=content, tool_calls=formatted_calls, **kwargs
+        )
+
+
+class Memory(BaseModel):
+    messages: List[Message] = Field(default_factory=list)
+    max_messages: int = Field(default=100)
+
+    def add_message(self, message: Message) -> None:
+        """Add a message to memory"""
+        self.messages.append(message)
+        # Optional: Implement message limit
+        if len(self.messages) > self.max_messages:
+            self.messages = self.messages[-self.max_messages :]
+
+    def add_messages(self, messages: List[Message]) -> None:
+        """Add multiple messages to memory"""
+        self.messages.extend(messages)
+
+    def clear(self) -> None:
+        """Clear all messages"""
+        self.messages.clear()
+
+    def get_recent_messages(self, n: int) -> List[Message]:
+        """Get n most recent messages"""
+        return self.messages[-n:]
+
+    def to_dict_list(self) -> List[dict]:
+        """Convert messages to list of dicts"""
+        return [msg.to_dict() for msg in self.messages]
--- a/app/tool/init.py
+++ b/app/tool/init.py
@ -0,0 +1,18 @@
+from app.tool.base import BaseTool
+from app.tool.bash import Bash
+from app.tool.create_chat_completion import CreateChatCompletion
+from app.tool.planning import PlanningTool
+from app.tool.str_replace_editor import StrReplaceEditor
+from app.tool.terminate import Terminate
+from app.tool.tool_collection import ToolCollection
+
+
+__all__ = [
+    "BaseTool",
+    "Bash",
+    "Terminate",
+    "StrReplaceEditor",
+    "ToolCollection",
+    "CreateChatCompletion",
+    "PlanningTool",
+]
--- a/app/tool/base.py
+++ b/app/tool/base.py
@ -0,0 +1,82 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+
+class BaseTool(ABC, BaseModel):
+    name: str
+    description: str
+    parameters: Optional[dict] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    async def __call__(self, **kwargs) -> Any:
+        """Execute the tool with given parameters."""
+        return await self.execute(**kwargs)
+
+    @abstractmethod
+    async def execute(self, **kwargs) -> Any:
+        """Execute the tool with given parameters."""
+
+    def to_param(self) -> Dict:
+        """Convert tool to function call format."""
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": self.description,
+                "parameters": self.parameters,
+            },
+        }
+
+
+class ToolResult(BaseModel):
+    """Represents the result of a tool execution."""
+
+    output: Any = Field(default=None)
+    error: Optional[str] = Field(default=None)
+    system: Optional[str] = Field(default=None)
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    def __bool__(self):
+        return any(getattr(self, field) for field in self.__fields__)
+
+    def __add__(self, other: "ToolResult"):
+        def combine_fields(
+            field: Optional[str], other_field: Optional[str], concatenate: bool = True
+        ):
+            if field and other_field:
+                if concatenate:
+                    return field + other_field
+                raise ValueError("Cannot combine tool results")
+            return field or other_field
+
+        return ToolResult(
+            output=combine_fields(self.output, other.output),
+            error=combine_fields(self.error, other.error),
+            system=combine_fields(self.system, other.system),
+        )
+
+    def __str__(self):
+        return f"Error: {self.error}" if self.error else self.output
+
+    def replace(self, **kwargs):
+        """Returns a new ToolResult with the given fields replaced."""
+        # return self.copy(update=kwargs)
+        return type(self)(**{**self.dict(), **kwargs})
+
+
+class CLIResult(ToolResult):
+    """A ToolResult that can be rendered as a CLI output."""
+
+
+class ToolFailure(ToolResult):
+    """A ToolResult that represents a failure."""
+
+
+class AgentAwareTool:
+    agent: Optional = None
--- a/app/tool/bash.py
+++ b/app/tool/bash.py
@ -0,0 +1,158 @@
+import asyncio
+import os
+from typing import Optional
+
+from app.exceptions import ToolError
+from app.tool.base import BaseTool, CLIResult, ToolResult
+
+
+_BASH_DESCRIPTION = """Execute a bash command in the terminal.
+* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
+* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
+* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
+"""
+
+
+class _BashSession:
+    """A session of a bash shell."""
+
+    _started: bool
+    _process: asyncio.subprocess.Process
+
+    command: str = "/bin/bash"
+    _output_delay: float = 0.2  # seconds
+    _timeout: float = 120.0  # seconds
+    _sentinel: str = "<<exit>>"
+
+    def __init__(self):
+        self._started = False
+        self._timed_out = False
+
+    async def start(self):
+        if self._started:
+            return
+
+        self._process = await asyncio.create_subprocess_shell(
+            self.command,
+            preexec_fn=os.setsid,
+            shell=True,
+            bufsize=0,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+        self._started = True
+
+    def stop(self):
+        """Terminate the bash shell."""
+        if not self._started:
+            raise ToolError("Session has not started.")
+        if self._process.returncode is not None:
+            return
+        self._process.terminate()
+
+    async def run(self, command: str):
+        """Execute a command in the bash shell."""
+        if not self._started:
+            raise ToolError("Session has not started.")
+        if self._process.returncode is not None:
+            return ToolResult(
+                system="tool must be restarted",
+                error=f"bash has exited with returncode {self._process.returncode}",
+            )
+        if self._timed_out:
+            raise ToolError(
+                f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
+            )
+
+        # we know these are not None because we created the process with PIPEs
+        assert self._process.stdin
+        assert self._process.stdout
+        assert self._process.stderr
+
+        # send command to the process
+        self._process.stdin.write(
+            command.encode() + f"; echo '{self._sentinel}'\n".encode()
+        )
+        await self._process.stdin.drain()
+
+        # read output from the process, until the sentinel is found
+        try:
+            async with asyncio.timeout(self._timeout):
+                while True:
+                    await asyncio.sleep(self._output_delay)
+                    # if we read directly from stdout/stderr, it will wait forever for
+                    # EOF. use the StreamReader buffer directly instead.
+                    output = (
+                        self._process.stdout._buffer.decode()
+                    )  # pyright: ignore[reportAttributeAccessIssue]
+                    if self._sentinel in output:
+                        # strip the sentinel and break
+                        output = output[: output.index(self._sentinel)]
+                        break
+        except asyncio.TimeoutError:
+            self._timed_out = True
+            raise ToolError(
+                f"timed out: bash has not returned in {self._timeout} seconds and must be restarted",
+            ) from None
+
+        if output.endswith("\n"):
+            output = output[:-1]
+
+        error = (
+            self._process.stderr._buffer.decode()
+        )  # pyright: ignore[reportAttributeAccessIssue]
+        if error.endswith("\n"):
+            error = error[:-1]
+
+        # clear the buffers so that the next output can be read correctly
+        self._process.stdout._buffer.clear()  # pyright: ignore[reportAttributeAccessIssue]
+        self._process.stderr._buffer.clear()  # pyright: ignore[reportAttributeAccessIssue]
+
+        return CLIResult(output=output, error=error)
+
+
+class Bash(BaseTool):
+    """A tool for executing bash commands"""
+
+    name: str = "bash"
+    description: str = _BASH_DESCRIPTION
+    parameters: dict = {
+        "type": "object",
+        "properties": {
+            "command": {
+                "type": "string",
+                "description": "The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.",
+            },
+        },
+        "required": ["command"],
+    }
+
+    _session: Optional[_BashSession] = None
+
+    async def execute(
+        self, command: str | None = None, restart: bool = False, **kwargs
+    ) -> CLIResult:
+        if restart:
+            if self._session:
+                self._session.stop()
+            self._session = _BashSession()
+            await self._session.start()
+
+            return ToolResult(system="tool has been restarted.")
+
+        if self._session is None:
+            self._session = _BashSession()
+            await self._session.start()
+
+        if command is not None:
+            return await self._session.run(command)
+
+        raise ToolError("no command provided.")
+
+
+if __name__ == "__main__":
+    bash = Bash()
+    rst = asyncio.run(bash.execute("ls -l"))
+    print(rst)
--- a/app/tool/create_chat_completion.py
+++ b/app/tool/create_chat_completion.py
@ -0,0 +1,169 @@
+from typing import Any, List, Optional, Type, Union, get_args, get_origin
+
+from pydantic import BaseModel, Field
+
+from app.tool import BaseTool
+
+
+class CreateChatCompletion(BaseTool):
+    name: str = "create_chat_completion"
+    description: str = (
+        "Creates a structured completion with specified output formatting."
+    )
+
+    # Type mapping for JSON schema
+    type_mapping: dict = {
+        str: "string",
+        int: "integer",
+        float: "number",
+        bool: "boolean",
+        dict: "object",
+        list: "array",
+    }
+    response_type: Optional[Type] = None
+    required: List[str] = Field(default_factory=lambda: ["response"])
+
+    def __init__(self, response_type: Optional[Type] = str):
+        """Initialize with a specific response type."""
+        super().__init__()
+        self.response_type = response_type
+        self.parameters = self._build_parameters()
+
+    def _build_parameters(self) -> dict:
+        """Build parameters schema based on response type."""
+        if self.response_type == str:
+            return {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "type": "string",
+                        "description": "The response text that should be delivered to the user.",
+                    },
+                },
+                "required": self.required,
+            }
+
+        if isinstance(self.response_type, type) and issubclass(
+            self.response_type, BaseModel
+        ):
+            schema = self.response_type.model_json_schema()
+            return {
+                "type": "object",
+                "properties": schema["properties"],
+                "required": schema.get("required", self.required),
+            }
+
+        return self._create_type_schema(self.response_type)
+
+    def _create_type_schema(self, type_hint: Type) -> dict:
+        """Create a JSON schema for the given type."""
+        origin = get_origin(type_hint)
+        args = get_args(type_hint)
+
+        # Handle primitive types
+        if origin is None:
+            return {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "type": self.type_mapping.get(type_hint, "string"),
+                        "description": f"Response of type {type_hint.__name__}",
+                    }
+                },
+                "required": self.required,
+            }
+
+        # Handle List type
+        if origin is list:
+            item_type = args[0] if args else Any
+            return {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "type": "array",
+                        "items": self._get_type_info(item_type),
+                    }
+                },
+                "required": self.required,
+            }
+
+        # Handle Dict type
+        if origin is dict:
+            value_type = args[1] if len(args) > 1 else Any
+            return {
+                "type": "object",
+                "properties": {
+                    "response": {
+                        "type": "object",
+                        "additionalProperties": self._get_type_info(value_type),
+                    }
+                },
+                "required": self.required,
+            }
+
+        # Handle Union type
+        if origin is Union:
+            return self._create_union_schema(args)
+
+        return self._build_parameters()
+
+    def _get_type_info(self, type_hint: Type) -> dict:
+        """Get type information for a single type."""
+        if isinstance(type_hint, type) and issubclass(type_hint, BaseModel):
+            return type_hint.model_json_schema()
+
+        return {
+            "type": self.type_mapping.get(type_hint, "string"),
+            "description": f"Value of type {getattr(type_hint, '__name__', 'any')}",
+        }
+
+    def _create_union_schema(self, types: tuple) -> dict:
+        """Create schema for Union types."""
+        return {
+            "type": "object",
+            "properties": {
+                "response": {"anyOf": [self._get_type_info(t) for t in types]}
+            },
+            "required": self.required,
+        }
+
+    async def execute(self, required: list | None = None, **kwargs) -> Any:
+        """Execute the chat completion with type conversion.
+
+        Args:
+            required: List of required field names or None
+            **kwargs: Response data
+
+        Returns:
+            Converted response based on response_type
+        """
+        required = required or self.required
+
+        # Handle case when required is a list
+        if isinstance(required, list) and len(required) > 0:
+            if len(required) == 1:
+                required_field = required[0]
+                result = kwargs.get(required_field, "")
+            else:
+                # Return multiple fields as a dictionary
+                return {field: kwargs.get(field, "") for field in required}
+        else:
+            required_field = "response"
+            result = kwargs.get(required_field, "")
+
+        # Type conversion logic
+        if self.response_type == str:
+            return result
+
+        if isinstance(self.response_type, type) and issubclass(
+            self.response_type, BaseModel
+        ):
+            return self.response_type(**kwargs)
+
+        if get_origin(self.response_type) in (list, dict):
+            return result  # Assuming result is already in correct format
+
+        try:
+            return self.response_type(result)
+        except (ValueError, TypeError):
+            return result
--- a/app/tool/planning.py
+++ b/app/tool/planning.py
@ -0,0 +1,364 @@
+# tool/planning.py
+from typing import Dict, List, Literal, Optional
+
+from app.exceptions import ToolError
+from app.tool.base import BaseTool, ToolResult
+
+
+_PLANNING_TOOL_DESCRIPTION = """
+A planning tool that allows the agent to create and manage plans for solving complex tasks.
+The tool provides functionality for creating plans, updating plan steps, and tracking progress.
+"""
+
+
+class PlanningTool(BaseTool):
+    """
+    A planning tool that allows the agent to create and manage plans for solving complex tasks.
+    The tool provides functionality for creating plans, updating plan steps, and tracking progress.
+    """
+
+    name: str = "planning"
+    description: str = _PLANNING_TOOL_DESCRIPTION
+    parameters: dict = {
+        "type": "object",
+        "properties": {
+            "command": {
+                "description": "The command to execute. Available commands: create, update, list, get, set_active, mark_step, delete.",
+                "enum": [
+                    "create",
+                    "update",
+                    "list",
+                    "get",
+                    "set_active",
+                    "mark_step",
+                    "delete",
+                ],
+                "type": "string",
+            },
+            "plan_id": {
+                "description": "Unique identifier for the plan. Required for create, update, set_active, and delete commands. Optional for get and mark_step (uses active plan if not specified).",
+                "type": "string",
+            },
+            "title": {
+                "description": "Title for the plan. Required for create command, optional for update command.",
+                "type": "string",
+            },
+            "steps": {
+                "description": "List of plan steps. Required for create command, optional for update command.",
+                "type": "array",
+                "items": {"type": "string"},
+            },
+            "step_index": {
+                "description": "Index of the step to update (0-based). Required for mark_step command.",
+                "type": "integer",
+            },
+            "step_status": {
+                "description": "Status to set for a step. Used with mark_step command.",
+                "enum": ["not_started", "in_progress", "completed", "blocked"],
+                "type": "string",
+            },
+            "step_notes": {
+                "description": "Additional notes for a step. Optional for mark_step command.",
+                "type": "string",
+            },
+        },
+        "required": ["command"],
+        "additionalProperties": False,
+    }
+
+    _plans = {}  # Dictionary to store plans by plan_id
+    _current_plan_id = None  # Track the current active plan
+
+    # async def __call__(
+    async def execute(
+        self,
+        *,
+        command: Literal[
+            "create", "update", "list", "get", "set_active", "mark_step", "delete"
+        ],
+        plan_id: Optional[str] = None,
+        title: Optional[str] = None,
+        steps: Optional[List[str]] = None,
+        step_index: Optional[int] = None,
+        step_status: Optional[
+            Literal["not_started", "in_progress", "completed", "blocked"]
+        ] = None,
+        step_notes: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        Execute the planning tool with the given command and parameters.
+
+        Parameters:
+        - command: The operation to perform
+        - plan_id: Unique identifier for the plan
+        - title: Title for the plan (used with create command)
+        - steps: List of steps for the plan (used with create command)
+        - step_index: Index of the step to update (used with mark_step command)
+        - step_status: Status to set for a step (used with mark_step command)
+        - step_notes: Additional notes for a step (used with mark_step command)
+        """
+
+        if command == "create":
+            return self._create_plan(plan_id, title, steps)
+        elif command == "update":
+            return self._update_plan(plan_id, title, steps)
+        elif command == "list":
+            return self._list_plans()
+        elif command == "get":
+            return self._get_plan(plan_id)
+        elif command == "set_active":
+            return self._set_active_plan(plan_id)
+        elif command == "mark_step":
+            return self._mark_step(plan_id, step_index, step_status, step_notes)
+        elif command == "delete":
+            return self._delete_plan(plan_id)
+        else:
+            raise ToolError(
+                f"Unrecognized command: {command}. Allowed commands are: create, update, list, get, set_active, mark_step, delete"
+            )
+
+    def _create_plan(
+        self, plan_id: Optional[str], title: Optional[str], steps: Optional[List[str]]
+    ) -> ToolResult:
+        """Create a new plan with the given ID, title, and steps."""
+        if not plan_id:
+            raise ToolError("Parameter `plan_id` is required for command: create")
+
+        if plan_id in self._plans:
+            raise ToolError(
+                f"A plan with ID '{plan_id}' already exists. Use 'update' to modify existing plans."
+            )
+
+        if not title:
+            raise ToolError("Parameter `title` is required for command: create")
+
+        if (
+            not steps
+            or not isinstance(steps, list)
+            or not all(isinstance(step, str) for step in steps)
+        ):
+            raise ToolError(
+                "Parameter `steps` must be a non-empty list of strings for command: create"
+            )
+
+        # Create a new plan with initialized step statuses
+        plan = {
+            "plan_id": plan_id,
+            "title": title,
+            "steps": steps,
+            "step_statuses": ["not_started"] * len(steps),
+            "step_notes": [""] * len(steps),
+        }
+
+        self._plans[plan_id] = plan
+        self._current_plan_id = plan_id  # Set as active plan
+
+        return ToolResult(
+            output=f"Plan created successfully with ID: {plan_id}\n\n{self._format_plan(plan)}"
+        )
+
+    def _update_plan(
+        self, plan_id: Optional[str], title: Optional[str], steps: Optional[List[str]]
+    ) -> ToolResult:
+        """Update an existing plan with new title or steps."""
+        if not plan_id:
+            raise ToolError("Parameter `plan_id` is required for command: update")
+
+        if plan_id not in self._plans:
+            raise ToolError(f"No plan found with ID: {plan_id}")
+
+        plan = self._plans[plan_id]
+
+        if title:
+            plan["title"] = title
+
+        if steps:
+            if not isinstance(steps, list) or not all(
+                isinstance(step, str) for step in steps
+            ):
+                raise ToolError(
+                    "Parameter `steps` must be a list of strings for command: update"
+                )
+
+            # Preserve existing step statuses for unchanged steps
+            old_steps = plan["steps"]
+            old_statuses = plan["step_statuses"]
+            old_notes = plan["step_notes"]
+
+            # Create new step statuses and notes
+            new_statuses = []
+            new_notes = []
+
+            for i, step in enumerate(steps):
+                # If the step exists at the same position in old steps, preserve status and notes
+                if i < len(old_steps) and step == old_steps[i]:
+                    new_statuses.append(old_statuses[i])
+                    new_notes.append(old_notes[i])
+                else:
+                    new_statuses.append("not_started")
+                    new_notes.append("")
+
+            plan["steps"] = steps
+            plan["step_statuses"] = new_statuses
+            plan["step_notes"] = new_notes
+
+        return ToolResult(
+            output=f"Plan updated successfully: {plan_id}\n\n{self._format_plan(plan)}"
+        )
+
+    def _list_plans(self) -> ToolResult:
+        """List all available plans."""
+        if not self._plans:
+            return ToolResult(
+                output="No plans available. Create a plan with the 'create' command."
+            )
+
+        output = "Available plans:\n"
+        for plan_id, plan in self._plans.items():
+            current_marker = " (active)" if plan_id == self._current_plan_id else ""
+            completed = sum(
+                1 for status in plan["step_statuses"] if status == "completed"
+            )
+            total = len(plan["steps"])
+            progress = f"{completed}/{total} steps completed"
+            output += f"• {plan_id}{current_marker}: {plan['title']} - {progress}\n"
+
+        return ToolResult(output=output)
+
+    def _get_plan(self, plan_id: Optional[str]) -> ToolResult:
+        """Get details of a specific plan."""
+        if not plan_id:
+            # If no plan_id is provided, use the current active plan
+            if not self._current_plan_id:
+                raise ToolError(
+                    "No active plan. Please specify a plan_id or set an active plan."
+                )
+            plan_id = self._current_plan_id
+
+        if plan_id not in self._plans:
+            raise ToolError(f"No plan found with ID: {plan_id}")
+
+        plan = self._plans[plan_id]
+        return ToolResult(output=self._format_plan(plan))
+
+    def _set_active_plan(self, plan_id: Optional[str]) -> ToolResult:
+        """Set a plan as the active plan."""
+        if not plan_id:
+            raise ToolError("Parameter `plan_id` is required for command: set_active")
+
+        if plan_id not in self._plans:
+            raise ToolError(f"No plan found with ID: {plan_id}")
+
+        self._current_plan_id = plan_id
+        return ToolResult(
+            output=f"Plan '{plan_id}' is now the active plan.\n\n{self._format_plan(self._plans[plan_id])}"
+        )
+
+    def _mark_step(
+        self,
+        plan_id: Optional[str],
+        step_index: Optional[int],
+        step_status: Optional[str],
+        step_notes: Optional[str],
+    ) -> ToolResult:
+        """Mark a step with a specific status and optional notes."""
+        if not plan_id:
+            # If no plan_id is provided, use the current active plan
+            if not self._current_plan_id:
+                raise ToolError(
+                    "No active plan. Please specify a plan_id or set an active plan."
+                )
+            plan_id = self._current_plan_id
+
+        if plan_id not in self._plans:
+            raise ToolError(f"No plan found with ID: {plan_id}")
+
+        if step_index is None:
+            raise ToolError("Parameter `step_index` is required for command: mark_step")
+
+        plan = self._plans[plan_id]
+
+        if step_index < 0 or step_index >= len(plan["steps"]):
+            raise ToolError(
+                f"Invalid step_index: {step_index}. Valid indices range from 0 to {len(plan['steps'])-1}."
+            )
+
+        if step_status and step_status not in [
+            "not_started",
+            "in_progress",
+            "completed",
+            "blocked",
+        ]:
+            raise ToolError(
+                f"Invalid step_status: {step_status}. Valid statuses are: not_started, in_progress, completed, blocked"
+            )
+
+        if step_status:
+            plan["step_statuses"][step_index] = step_status
+
+        if step_notes:
+            plan["step_notes"][step_index] = step_notes
+
+        return ToolResult(
+            output=f"Step {step_index} updated in plan '{plan_id}'.\n\n{self._format_plan(plan)}"
+        )
+
+    def _delete_plan(self, plan_id: Optional[str]) -> ToolResult:
+        """Delete a plan."""
+        if not plan_id:
+            raise ToolError("Parameter `plan_id` is required for command: delete")
+
+        if plan_id not in self._plans:
+            raise ToolError(f"No plan found with ID: {plan_id}")
+
+        del self._plans[plan_id]
+
+        # If the deleted plan was the active plan, clear the active plan
+        if self._current_plan_id == plan_id:
+            self._current_plan_id = None
+
+        return ToolResult(output=f"Plan '{plan_id}' has been deleted.")
+
+    def _format_plan(self, plan: Dict) -> str:
+        """Format a plan for display."""
+        output = f"Plan: {plan['title']} (ID: {plan['plan_id']})\n"
+        output += "=" * len(output) + "\n\n"
+
+        # Calculate progress statistics
+        total_steps = len(plan["steps"])
+        completed = sum(1 for status in plan["step_statuses"] if status == "completed")
+        in_progress = sum(
+            1 for status in plan["step_statuses"] if status == "in_progress"
+        )
+        blocked = sum(1 for status in plan["step_statuses"] if status == "blocked")
+        not_started = sum(
+            1 for status in plan["step_statuses"] if status == "not_started"
+        )
+
+        output += f"Progress: {completed}/{total_steps} steps completed "
+        if total_steps > 0:
+            percentage = (completed / total_steps) * 100
+            output += f"({percentage:.1f}%)\n"
+        else:
+            output += "(0%)\n"
+
+        output += f"Status: {completed} completed, {in_progress} in progress, {blocked} blocked, {not_started} not started\n\n"
+        output += "Steps:\n"
+
+        # Add each step with its status and notes
+        for i, (step, status, notes) in enumerate(
+            zip(plan["steps"], plan["step_statuses"], plan["step_notes"])
+        ):
+            status_symbol = {
+                "not_started": "[ ]",
+                "in_progress": "[→]",
+                "completed": "[✓]",
+                "blocked": "[!]",
+            }.get(status, "[ ]")
+
+            output += f"{i}. {status_symbol} {step}\n"
+            if notes:
+                output += f"   Notes: {notes}\n"
+
+        return output
--- a/app/tool/run.py
+++ b/app/tool/run.py
@ -0,0 +1,43 @@
+"""Utility to run shell commands asynchronously with a timeout."""
+
+import asyncio
+
+
+TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
+MAX_RESPONSE_LEN: int = 16000
+
+
+def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
+    """Truncate content and append a notice if content exceeds the specified length."""
+    return (
+        content
+        if not truncate_after or len(content) <= truncate_after
+        else content[:truncate_after] + TRUNCATED_MESSAGE
+    )
+
+
+async def run(
+    cmd: str,
+    timeout: float | None = 120.0,  # seconds
+    truncate_after: int | None = MAX_RESPONSE_LEN,
+):
+    """Run a shell command asynchronously with a timeout."""
+    process = await asyncio.create_subprocess_shell(
+        cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+    )
+
+    try:
+        stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
+        return (
+            process.returncode or 0,
+            maybe_truncate(stdout.decode(), truncate_after=truncate_after),
+            maybe_truncate(stderr.decode(), truncate_after=truncate_after),
+        )
+    except asyncio.TimeoutError as exc:
+        try:
+            process.kill()
+        except ProcessLookupError:
+            pass
+        raise TimeoutError(
+            f"Command '{cmd}' timed out after {timeout} seconds"
+        ) from exc
--- a/app/tool/str_replace_editor.py
+++ b/app/tool/str_replace_editor.py
@ -0,0 +1,342 @@
+from collections import defaultdict
+from pathlib import Path
+from typing import Literal, get_args
+
+from app.exceptions import ToolError
+from app.tool import BaseTool
+from app.tool.base import CLIResult, ToolResult
+from app.tool.run import run
+
+
+Command = Literal[
+    "view",
+    "create",
+    "str_replace",
+    "insert",
+    "undo_edit",
+]
+SNIPPET_LINES: int = 4
+
+MAX_RESPONSE_LEN: int = 16000
+
+TRUNCATED_MESSAGE: str = "<response clipped><NOTE>To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for.</NOTE>"
+
+_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files
+* State is persistent across command calls and discussions with the user
+* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
+* The `create` command cannot be used if the specified `path` already exists as a file
+* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
+* The `undo_edit` command will revert the last edit made to the file at `path`
+
+Notes for using the `str_replace` command:
+* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
+* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
+* The `new_str` parameter should contain the edited lines that should replace the `old_str`
+"""
+
+
+def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN):
+    """Truncate content and append a notice if content exceeds the specified length."""
+    return (
+        content
+        if not truncate_after or len(content) <= truncate_after
+        else content[:truncate_after] + TRUNCATED_MESSAGE
+    )
+
+
+class StrReplaceEditor(BaseTool):
+    """A tool for executing bash commands"""
+
+    name: str = "str_replace_editor"
+    description: str = _STR_REPLACE_EDITOR_DESCRIPTION
+    parameters: dict = {
+        "type": "object",
+        "properties": {
+            "command": {
+                "description": "The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.",
+                "enum": ["view", "create", "str_replace", "insert", "undo_edit"],
+                "type": "string",
+            },
+            "path": {
+                "description": "Absolute path to file or directory.",
+                "type": "string",
+            },
+            "file_text": {
+                "description": "Required parameter of `create` command, with the content of the file to be created.",
+                "type": "string",
+            },
+            "old_str": {
+                "description": "Required parameter of `str_replace` command containing the string in `path` to replace.",
+                "type": "string",
+            },
+            "new_str": {
+                "description": "Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.",
+                "type": "string",
+            },
+            "insert_line": {
+                "description": "Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.",
+                "type": "integer",
+            },
+            "view_range": {
+                "description": "Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.",
+                "items": {"type": "integer"},
+                "type": "array",
+            },
+        },
+        "required": ["command", "path"],
+    }
+
+    _file_history: list = defaultdict(list)
+
+    async def execute(
+        self,
+        *,
+        command: Command,
+        path: str,
+        file_text: str | None = None,
+        view_range: list[int] | None = None,
+        old_str: str | None = None,
+        new_str: str | None = None,
+        insert_line: int | None = None,
+        **kwargs,
+    ) -> str:
+        _path = Path(path)
+        self.validate_path(command, _path)
+        if command == "view":
+            result = await self.view(_path, view_range)
+        elif command == "create":
+            if file_text is None:
+                raise ToolError("Parameter `file_text` is required for command: create")
+            self.write_file(_path, file_text)
+            self._file_history[_path].append(file_text)
+            result = ToolResult(output=f"File created successfully at: {_path}")
+        elif command == "str_replace":
+            if old_str is None:
+                raise ToolError(
+                    "Parameter `old_str` is required for command: str_replace"
+                )
+            result = self.str_replace(_path, old_str, new_str)
+        elif command == "insert":
+            if insert_line is None:
+                raise ToolError(
+                    "Parameter `insert_line` is required for command: insert"
+                )
+            if new_str is None:
+                raise ToolError("Parameter `new_str` is required for command: insert")
+            result = self.insert(_path, insert_line, new_str)
+        elif command == "undo_edit":
+            result = self.undo_edit(_path)
+        else:
+            raise ToolError(
+                f'Unrecognized command {command}. The allowed commands for the {self.name} tool are: {", ".join(get_args(Command))}'
+            )
+        return str(result)
+
+    def validate_path(self, command: str, path: Path):
+        """
+        Check that the path/command combination is valid.
+        """
+        # Check if its an absolute path
+        if not path.is_absolute():
+            suggested_path = Path("") / path
+            raise ToolError(
+                f"The path {path} is not an absolute path, it should start with `/`. Maybe you meant {suggested_path}?"
+            )
+        # Check if path exists
+        if not path.exists() and command != "create":
+            raise ToolError(
+                f"The path {path} does not exist. Please provide a valid path."
+            )
+        if path.exists() and command == "create":
+            raise ToolError(
+                f"File already exists at: {path}. Cannot overwrite files using command `create`."
+            )
+        # Check if the path points to a directory
+        if path.is_dir():
+            if command != "view":
+                raise ToolError(
+                    f"The path {path} is a directory and only the `view` command can be used on directories"
+                )
+
+    async def view(self, path: Path, view_range: list[int] | None = None):
+        """Implement the view command"""
+        if path.is_dir():
+            if view_range:
+                raise ToolError(
+                    "The `view_range` parameter is not allowed when `path` points to a directory."
+                )
+
+            _, stdout, stderr = await run(
+                rf"find {path} -maxdepth 2 -not -path '*/\.*'"
+            )
+            if not stderr:
+                stdout = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{stdout}\n"
+            return CLIResult(output=stdout, error=stderr)
+
+        file_content = self.read_file(path)
+        init_line = 1
+        if view_range:
+            if len(view_range) != 2 or not all(isinstance(i, int) for i in view_range):
+                raise ToolError(
+                    "Invalid `view_range`. It should be a list of two integers."
+                )
+            file_lines = file_content.split("\n")
+            n_lines_file = len(file_lines)
+            init_line, final_line = view_range
+            if init_line < 1 or init_line > n_lines_file:
+                raise ToolError(
+                    f"Invalid `view_range`: {view_range}. Its first element `{init_line}` should be within the range of lines of the file: {[1, n_lines_file]}"
+                )
+            if final_line > n_lines_file:
+                raise ToolError(
+                    f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be smaller than the number of lines in the file: `{n_lines_file}`"
+                )
+            if final_line != -1 and final_line < init_line:
+                raise ToolError(
+                    f"Invalid `view_range`: {view_range}. Its second element `{final_line}` should be larger or equal than its first `{init_line}`"
+                )
+
+            if final_line == -1:
+                file_content = "\n".join(file_lines[init_line - 1 :])
+            else:
+                file_content = "\n".join(file_lines[init_line - 1 : final_line])
+
+        return CLIResult(
+            output=self._make_output(file_content, str(path), init_line=init_line)
+        )
+
+    def str_replace(self, path: Path, old_str: str, new_str: str | None):
+        """Implement the str_replace command, which replaces old_str with new_str in the file content"""
+        # Read the file content
+        file_content = self.read_file(path).expandtabs()
+        old_str = old_str.expandtabs()
+        new_str = new_str.expandtabs() if new_str is not None else ""
+
+        # Check if old_str is unique in the file
+        occurrences = file_content.count(old_str)
+        if occurrences == 0:
+            raise ToolError(
+                f"No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}."
+            )
+        elif occurrences > 1:
+            file_content_lines = file_content.split("\n")
+            lines = [
+                idx + 1
+                for idx, line in enumerate(file_content_lines)
+                if old_str in line
+            ]
+            raise ToolError(
+                f"No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {lines}. Please ensure it is unique"
+            )
+
+        # Replace old_str with new_str
+        new_file_content = file_content.replace(old_str, new_str)
+
+        # Write the new content to the file
+        self.write_file(path, new_file_content)
+
+        # Save the content to history
+        self._file_history[path].append(file_content)
+
+        # Create a snippet of the edited section
+        replacement_line = file_content.split(old_str)[0].count("\n")
+        start_line = max(0, replacement_line - SNIPPET_LINES)
+        end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
+        snippet = "\n".join(new_file_content.split("\n")[start_line : end_line + 1])
+
+        # Prepare the success message
+        success_msg = f"The file {path} has been edited. "
+        success_msg += self._make_output(
+            snippet, f"a snippet of {path}", start_line + 1
+        )
+        success_msg += "Review the changes and make sure they are as expected. Edit the file again if necessary."
+
+        return CLIResult(output=success_msg)
+
+    def insert(self, path: Path, insert_line: int, new_str: str):
+        """Implement the insert command, which inserts new_str at the specified line in the file content."""
+        file_text = self.read_file(path).expandtabs()
+        new_str = new_str.expandtabs()
+        file_text_lines = file_text.split("\n")
+        n_lines_file = len(file_text_lines)
+
+        if insert_line < 0 or insert_line > n_lines_file:
+            raise ToolError(
+                f"Invalid `insert_line` parameter: {insert_line}. It should be within the range of lines of the file: {[0, n_lines_file]}"
+            )
+
+        new_str_lines = new_str.split("\n")
+        new_file_text_lines = (
+            file_text_lines[:insert_line]
+            + new_str_lines
+            + file_text_lines[insert_line:]
+        )
+        snippet_lines = (
+            file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
+            + new_str_lines
+            + file_text_lines[insert_line : insert_line + SNIPPET_LINES]
+        )
+
+        new_file_text = "\n".join(new_file_text_lines)
+        snippet = "\n".join(snippet_lines)
+
+        self.write_file(path, new_file_text)
+        self._file_history[path].append(file_text)
+
+        success_msg = f"The file {path} has been edited. "
+        success_msg += self._make_output(
+            snippet,
+            "a snippet of the edited file",
+            max(1, insert_line - SNIPPET_LINES + 1),
+        )
+        success_msg += "Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc). Edit the file again if necessary."
+        return CLIResult(output=success_msg)
+
+    def undo_edit(self, path: Path):
+        """Implement the undo_edit command."""
+        if not self._file_history[path]:
+            raise ToolError(f"No edit history found for {path}.")
+
+        old_text = self._file_history[path].pop()
+        self.write_file(path, old_text)
+
+        return CLIResult(
+            output=f"Last edit to {path} undone successfully. {self._make_output(old_text, str(path))}"
+        )
+
+    def read_file(self, path: Path):
+        """Read the content of a file from a given path; raise a ToolError if an error occurs."""
+        try:
+            return path.read_text()
+        except Exception as e:
+            raise ToolError(f"Ran into {e} while trying to read {path}") from None
+
+    def write_file(self, path: Path, file: str):
+        """Write the content of a file to a given path; raise a ToolError if an error occurs."""
+        try:
+            path.write_text(file)
+        except Exception as e:
+            raise ToolError(f"Ran into {e} while trying to write to {path}") from None
+
+    def _make_output(
+        self,
+        file_content: str,
+        file_descriptor: str,
+        init_line: int = 1,
+        expand_tabs: bool = True,
+    ):
+        """Generate output for the CLI based on the content of a file."""
+        file_content = maybe_truncate(file_content)
+        if expand_tabs:
+            file_content = file_content.expandtabs()
+        file_content = "\n".join(
+            [
+                f"{i + init_line:6}\t{line}"
+                for i, line in enumerate(file_content.split("\n"))
+            ]
+        )
+        return (
+            f"Here's the result of running `cat -n` on {file_descriptor}:\n"
+            + file_content
+            + "\n"
+        )
--- a/app/tool/terminate.py
+++ b/app/tool/terminate.py
@ -0,0 +1,24 @@
+from app.tool.base import BaseTool
+
+
+_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task."""
+
+
+class Terminate(BaseTool):
+    name: str = "terminate"
+    description: str = _TERMINATE_DESCRIPTION
+    parameters: dict = {
+        "type": "object",
+        "properties": {
+            "status": {
+                "type": "string",
+                "description": "The finish status of the interaction.",
+                "enum": ["success", "failure"],
+            }
+        },
+        "required": ["status"],
+    }
+
+    async def execute(self, status: str) -> str:
+        """Finish the current execution"""
+        return f"The interaction has been completed with status: {status}"
--- a/app/tool/tool_collection.py
+++ b/app/tool/tool_collection.py
@ -0,0 +1,55 @@
+"""Collection classes for managing multiple tools."""
+from typing import Any, Dict, List
+
+from app.exceptions import ToolError
+from app.tool.base import BaseTool, ToolFailure, ToolResult
+
+
+class ToolCollection:
+    """A collection of defined tools."""
+
+    def __init__(self, *tools: BaseTool):
+        self.tools = tools
+        self.tool_map = {tool.name: tool for tool in tools}
+
+    def __iter__(self):
+        return iter(self.tools)
+
+    def to_params(self) -> List[Dict[str, Any]]:
+        return [tool.to_param() for tool in self.tools]
+
+    async def execute(
+        self, *, name: str, tool_input: Dict[str, Any] = None
+    ) -> ToolResult:
+        tool = self.tool_map.get(name)
+        if not tool:
+            return ToolFailure(error=f"Tool {name} is invalid")
+        try:
+            result = await tool(**tool_input)
+            return result
+        except ToolError as e:
+            return ToolFailure(error=e.message)
+
+    async def execute_all(self) -> List[ToolResult]:
+        """Execute all tools in the collection sequentially."""
+        results = []
+        for tool in self.tools:
+            try:
+                result = await tool()
+                results.append(result)
+            except ToolError as e:
+                results.append(ToolFailure(error=e.message))
+        return results
+
+    def get_tool(self, name: str) -> BaseTool:
+        return self.tool_map.get(name)
+
+    def add_tool(self, tool: BaseTool):
+        self.tools += (tool,)
+        self.tool_map[tool.name] = tool
+        return self
+
+    def add_tools(self, *tools: BaseTool):
+        for tool in tools:
+            self.add_tool(tool)
+        return self
--- a/app/utils/init.py
+++ b/app/utils/init.py
--- a/app/utils/extract_html_content.py
+++ b/app/utils/extract_html_content.py
@ -0,0 +1,112 @@
+import re
+
+
+def extract_html_content(text: str, stack: str = "react-tailwind") -> str:
+    """
+    Extract code content from LLM response based on technology stack.
+
+    Args:
+        text: The raw text response from LLM
+        stack: Technology stack ("react-tailwind", "html-tailwind", "svg")
+
+    Returns:
+        str: Extracted code content
+    """
+    # Remove markdown code blocks if present
+    text = re.sub(r"```[\w]*\n|```", "", text)
+
+    if stack == "svg":
+        # Extract SVG content
+        svg_match = re.search(r"(<svg.*?>.*?</svg>)", text, re.DOTALL)
+        if svg_match:
+            return svg_match.group(1)
+    elif stack == "react-tailwind":
+        # Extract React component content
+        react_match = re.search(r"(export default function.*?})\s*$", text, re.DOTALL)
+        if react_match:
+            return react_match.group(1)
+        # Alternative: look for const/function component definition
+        alt_match = re.search(
+            r"((?:const|function)\s+\w+\s*=?\s*(?:\([^)]*\))?\s*=>?\s*{.*?})\s*$",
+            text,
+            re.DOTALL,
+        )
+        if alt_match:
+            return alt_match.group(1)
+
+    # Default: try to extract content within <html> tags
+    html_match = re.search(r"(<html.*?>.*?</html>)", text, re.DOTALL)
+    if html_match:
+        return html_match.group(1)
+
+    # If no specific patterns match, try to extract any HTML-like content
+    body_match = re.search(r"(<body.*?>.*?</body>)", text, re.DOTALL)
+    if body_match:
+        return f"<html>\n{body_match.group(1)}\n</html>"
+
+    div_match = re.search(r"(<div.*?>.*?</div>)", text, re.DOTALL)
+    if div_match:
+        return f"<html>\n<body>\n{div_match.group(1)}\n</body>\n</html>"
+
+    # If no patterns match, clean up the text and return it
+    cleaned_text = text.strip()
+    print(
+        f"[Code Extraction] No specific pattern found for stack '{stack}'. Raw content:\n{cleaned_text}"
+    )
+    return cleaned_text
+
+
+def clean_code_content(code: str) -> str:
+    """
+    Clean and format the extracted code content.
+
+    Args:
+        code: Raw code content
+
+    Returns:
+        str: Cleaned and formatted code
+    """
+    # Remove leading/trailing whitespace
+    code = code.strip()
+
+    # Remove extra blank lines
+    code = re.sub(r"\n\s*\n", "\n\n", code)
+
+    # Ensure proper indentation
+    lines = code.split("\n")
+    indent_level = 0
+    formatted_lines = []
+
+    for line in lines:
+        # Adjust indent level based on brackets/braces
+        stripped_line = line.strip()
+        if stripped_line.endswith("{"):
+            formatted_lines.append("  " * indent_level + stripped_line)
+            indent_level += 1
+        elif stripped_line.startswith("}"):
+            indent_level = max(0, indent_level - 1)
+            formatted_lines.append("  " * indent_level + stripped_line)
+        else:
+            formatted_lines.append("  " * indent_level + stripped_line)
+
+    return "\n".join(formatted_lines)
+
+
+def extract_code_content(text: str, stack: str = "react-tailwind") -> str:
+    """
+    Main function to extract and clean code content.
+
+    Args:
+        text: Raw text from LLM response
+        stack: Technology stack being used
+
+    Returns:
+        str: Final cleaned and formatted code
+    """
+    # Extract the relevant code content
+    extracted_content = extract_html_content(text, stack)
+
+    # Clean and format the code
+    cleaned_content = clean_code_content(extracted_content)
+
+    return cleaned_content
--- a/app/utils/shutdown_listener.py
+++ b/app/utils/shutdown_listener.py
@ -0,0 +1,74 @@
+"""
+This module monitors the app for shutdown signals
+"""
+
+import asyncio
+import signal
+import threading
+import time
+from types import FrameType
+
+from uvicorn.server import HANDLED_SIGNALS
+
+from app.logger import logger
+
+
+_should_exit = None
+
+
+def _register_signal_handler(sig: signal.Signals):
+    original_handler = None
+
+    def handler(sig_: int, frame: FrameType | None):
+        logger.debug(f"shutdown_signal:{sig_}")
+        global _should_exit
+        _should_exit = True
+        if original_handler:
+            original_handler(sig_, frame)  # type: ignore[unreachable]
+
+    original_handler = signal.signal(sig, handler)
+
+
+def _register_signal_handlers():
+    global _should_exit
+    if _should_exit is not None:
+        return
+    _should_exit = False
+
+    logger.debug("_register_signal_handlers")
+
+    # Check if we're in the main thread of the main interpreter
+    if threading.current_thread() is threading.main_thread():
+        logger.debug("_register_signal_handlers:main_thread")
+        for sig in HANDLED_SIGNALS:
+            _register_signal_handler(sig)
+    else:
+        logger.debug("_register_signal_handlers:not_main_thread")
+
+
+def should_exit() -> bool:
+    _register_signal_handlers()
+    return bool(_should_exit)
+
+
+def should_continue() -> bool:
+    _register_signal_handlers()
+    return not _should_exit
+
+
+def sleep_if_should_continue(timeout: float):
+    if timeout <= 1:
+        time.sleep(timeout)
+        return
+    start_time = time.time()
+    while (time.time() - start_time) < timeout and should_continue():
+        time.sleep(1)
+
+
+async def async_sleep_if_should_continue(timeout: float):
+    if timeout <= 1:
+        await asyncio.sleep(timeout)
+        return
+    start_time = time.time()
+    while time.time() - start_time < timeout and should_continue():
+        await asyncio.sleep(1)
--- a/config/config.example.toml
+++ b/config/config.example.toml
@ -0,0 +1,13 @@
+# Global LLM configuration
+[llm]
+model = "deepseek-chat"
+base_url = "https://api.deepseek.com/v1"
+api_key = "sk-..."
+max_tokens = 4096
+temperature = 0.0
+
+# Optional configuration for specific LLM models
+[llm.vision]
+model = "..."
+base_url = "..."
+api_key = "sk-..."
--- a/main.py
+++ b/main.py
@ -0,0 +1,23 @@
+import asyncio
+
+from app.agent import ToolCallAgent
+from app.logger import logger
+
+
+async def main():
+    agent = ToolCallAgent()
+    while True:
+        try:
+            prompt = input("Enter your prompt (or 'exit' to quit): ")
+            if prompt.lower() == "exit":
+                logger.info("Goodbye!")
+                break
+            logger.warning("Processing your request...")
+            await agent.run(prompt)
+        except KeyboardInterrupt:
+            logger.warning("Goodbye!")
+            break
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,14 @@
+pydantic~=2.10.4
+openai~=1.58.1
+tenacity~=9.0.0
+pyyaml~=6.0.2
+loguru~=0.7.3
+numpy
+datasets~=3.2.0
+
+html2text~=2024.2.26
+gymnasium~=1.0.0
+pillow~=10.4.0
+browsergym~=0.13.3
+uvicorn~=0.34.0
+unidiff~=0.7.5
--- a/run_loop.py
+++ b/run_loop.py
@ -0,0 +1,19 @@
+import asyncio
+
+from app.agent import ToolCallAgent
+from app.flow.base import FlowType
+from app.flow.flow_factory import FlowFactory
+
+
+if __name__ == "__main__":
+    agent = ToolCallAgent()
+
+    flow = FlowFactory.create_flow(
+        flow_type=FlowType.PLANNING,
+        agents=agent,
+    )
+
+    result = asyncio.run(
+        flow.execute("Create a web app that shows Japan travel destinations")
+    )
+    print(result)