Merge remote-tracking branch 'upstream/main' into sandbox

2025-03-18 17:24:57 +08:00 · 2025-03-18 17:24:57 +08:00 · 4df605e8db
commit 4df605e8db
parent 15024e320a 91b1d06f9c
71 changed files with 3617 additions and 453 deletions
--- a/.github/ISSUE_TEMPLATE/config.yaml
+++ b/.github/ISSUE_TEMPLATE/config.yaml
@ -0,0 +1,4 @@
+blank_issues_enabled: false
+contact_links:
+  - name: "📑 Read online docs"
+    about: Find tutorials, use cases, and guides in the OpenManus documentation.
--- a/.github/ISSUE_TEMPLATE/request_new_features.md
+++ b/.github/ISSUE_TEMPLATE/request_new_features.md
@ -0,0 +1,14 @@
+---
+name: "🤔 Request new features"
+about: Suggest ideas or features you’d like to see implemented in OpenManus.
+title: ''
+labels: kind/features
+assignees: ''
+---
+
+**Feature description**
+<!-- Provide a clear and concise description of the proposed feature -->
+
+**Your Feature**
+<!-- Explain your idea or implementation process. Optionally, include a Pull Request URL. -->
+<!-- Ensure accompanying docs/tests/examples are provided for review. -->
--- a/.github/ISSUE_TEMPLATE/show_me_the_bug.md
+++ b/.github/ISSUE_TEMPLATE/show_me_the_bug.md
@ -0,0 +1,25 @@
+---
+name: "🪲 Show me the Bug"
+about: Report a bug encountered while using OpenManus and seek assistance.
+title: ''
+labels: kind/bug
+assignees: ''
+---
+
+**Bug description**
+<!-- Clearly describe the bug you encountered -->
+
+**Bug solved method**
+<!-- If resolved, explain the solution. Optionally, include a Pull Request URL. -->
+<!-- If unresolved, provide additional details to aid investigation -->
+
+**Environment information**
+<!-- System: e.g., Ubuntu 22.04, Python: e.g., 3.12, OpenManus version: e.g., 0.1.0 -->
+
+- System version:
+- Python version:
+- OpenManus version or branch:
+- Installation method (e.g., `pip install -r requirements.txt` or `pip install -e .`):
+
+**Screenshots or logs**
+<!-- Attach screenshots or logs to help diagnose the issue -->
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -0,0 +1,17 @@
+**Features**
+<!-- Describe the features or bug fixes in this PR. For bug fixes, link to the issue. -->
+
+- Feature 1
+- Feature 2
+
+**Feature Docs**
+<!-- Provide RFC, tutorial, or use case links for significant updates. Optional for minor changes. -->
+
+**Influence**
+<!-- Explain the impact of these changes for reviewer focus. -->
+
+**Result**
+<!-- Include screenshots or logs of unit tests or running results. -->
+
+**Other**
+<!-- Additional notes about this PR. -->
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,58 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 4
+    groups:
+      # Group critical packages that might need careful review
+      core-dependencies:
+        patterns:
+          - "pydantic*"
+          - "openai"
+          - "fastapi"
+          - "tiktoken"
+      browsergym-related:
+        patterns:
+          - "browsergym*"
+          - "browser-use"
+          - "playwright"
+      search-tools:
+        patterns:
+          - "googlesearch-python"
+          - "baidusearch"
+          - "duckduckgo_search"
+      pre-commit:
+        patterns:
+          - "pre-commit"
+      security-all:
+        applies-to: "security-updates"
+        patterns:
+          - "*"
+      version-all:
+        applies-to: "version-updates"
+        patterns:
+          - "*"
+        exclude-patterns:
+          - "pydantic*"
+          - "openai"
+          - "fastapi"
+          - "tiktoken"
+          - "browsergym*"
+          - "browser-use"
+          - "playwright"
+          - "googlesearch-python"
+          - "baidusearch"
+          - "duckduckgo_search"
+          - "pre-commit"
+
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 4
+    groups:
+      actions:
+        patterns:
+          - "*"
--- a/.github/workflows/build-package.yaml
+++ b/.github/workflows/build-package.yaml
@ -0,0 +1,33 @@
+name: Build and upload Python package
+
+on:
+  workflow_dispatch:
+  release:
+    types: [created, published]
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install setuptools wheel twine
+      - name: Set package version
+        run: |
+          export VERSION="${GITHUB_REF#refs/tags/v}"
+          sed -i "s/version=.*/version=\"${VERSION}\",/" setup.py
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+        run: |
+          python setup.py bdist_wheel sdist
+          twine upload dist/*
--- a/.github/workflows/environment-corrupt-check.yaml
+++ b/.github/workflows/environment-corrupt-check.yaml
@ -0,0 +1,33 @@
+name: Environment Corruption Check
+on:
+  push:
+    branches: ["main"]
+    paths:
+      - requirements.txt
+  pull_request:
+    branches: ["main"]
+    paths:
+      - requirements.txt
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  test-python-versions:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11.11", "3.12.8", "3.13.2"]
+      fail-fast: false
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Upgrade pip
+        run: |
+          python -m pip install --upgrade pip
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
--- a/.github/workflows/pr-autodiff.yaml
+++ b/.github/workflows/pr-autodiff.yaml
@ -0,0 +1,127 @@
+name: PR Diff Summarization
+on:
+  # pull_request:
+  #   branches: [main]
+  #   types: [opened, ready_for_review, reopened]
+  issue_comment:
+    types: [created]
+permissions:
+  contents: read
+  pull-requests: write
+jobs:
+  pr-diff-summarization:
+    runs-on: ubuntu-latest
+    if: |
+      (github.event_name == 'pull_request') ||
+      (github.event_name == 'issue_comment' &&
+       contains(github.event.comment.body, '!pr-diff') &&
+       (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') &&
+       github.event.issue.pull_request)
+    steps:
+      - name: Get PR head SHA
+        id: get-pr-sha
+        run: |
+          if [ "${{ github.event_name }}" == "pull_request" ]; then
+            echo "pr_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT
+            echo "Retrieved PR head SHA: ${{ github.event.pull_request.head.sha }}"
+          else
+            PR_URL="${{ github.event.issue.pull_request.url }}"
+            SHA=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL | jq -r '.head.sha')
+            echo "pr_sha=$SHA" >> $GITHUB_OUTPUT
+            echo "Retrieved PR head SHA from API: $SHA"
+          fi
+      - name: Check out code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.get-pr-sha.outputs.pr_sha }}
+          fetch-depth: 0
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install openai requests
+      - name: Create and run Python script
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
+        run: |-
+          cat << 'EOF' > /tmp/_workflow_core.py
+          import os
+          import subprocess
+          import json
+          import requests
+          from openai import OpenAI
+
+          def get_diff():
+              result = subprocess.run(
+                  ['git', 'diff', 'origin/main...HEAD'],
+                  capture_output=True, text=True, check=True)
+              return '\n'.join(
+                  line for line in result.stdout.split('\n')
+                  if any(line.startswith(c) for c in ('+', '-'))
+                  and not line.startswith(('---', '+++'))
+              )[:round(200000 * 0.4)]  # Truncate to prevent overflow
+
+          def generate_comment(diff_content):
+              client = OpenAI(
+                  base_url=os.getenv("OPENAI_BASE_URL"),
+                  api_key=os.getenv("OPENAI_API_KEY")
+              )
+
+              guidelines = '''
+          1. English version first, Chinese Simplified version after
+          2. Example format:
+              # Diff Report
+              ## English
+              - Added `ABC` class
+              - Fixed `f()` behavior in `foo` module
+
+              ### Comments Highlight
+              - `config.toml` needs to be configured properly to make sure new features work as expected.
+
+              ### Spelling/Offensive Content Check
+              - No spelling mistakes or offensive content found in the code or comments.
+          3. Highlight non-English comments
+          4. Check for spelling/offensive content'''
+
+              response = client.chat.completions.create(
+                  model="o3-mini",
+                  messages=[{
+                      "role": "system",
+                      "content": "Generate bilingual code review feedback."
+                  }, {
+                      "role": "user",
+                      "content": f"Review these changes per guidelines:\n{guidelines}\n\nDIFF:\n{diff_content}"
+                  }]
+              )
+              return response.choices[0].message.content
+
+          def post_comment(comment):
+              repo = os.getenv("GITHUB_REPOSITORY")
+              pr_number = os.getenv("PR_NUMBER")
+
+              headers = {
+                  "Authorization": f"Bearer {os.getenv('GH_TOKEN')}",
+                  "Accept": "application/vnd.github.v3+json"
+              }
+              url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+
+              requests.post(url, json={"body": comment}, headers=headers)
+
+          if __name__ == "__main__":
+              diff_content = get_diff()
+              if not diff_content.strip():
+                  print("No meaningful diff detected.")
+                  exit(0)
+
+              comment = generate_comment(diff_content)
+              post_comment(comment)
+              print("Comment posted successfully.")
+          EOF
+
+          python /tmp/_workflow_core.py
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@ -0,0 +1,26 @@
+name: Pre-commit checks
+
+on:
+  pull_request:
+    branches:
+      - '**'
+  push:
+    branches:
+      - '**'
+
+jobs:
+  pre-commit-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Source Code
+        uses: actions/checkout@v4
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: Install pre-commit and tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install pre-commit black==23.1.0 isort==5.12.0 autoflake==2.0.1
+      - name: Run pre-commit hooks
+        run: pre-commit run --all-files
--- a/.github/workflows/stale.yaml
+++ b/.github/workflows/stale.yaml
@ -0,0 +1,23 @@
+name: Close inactive issues
+
+on:
+  schedule:
+    - cron: "5 0 * * *"
+
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v9
+        with:
+          days-before-issue-stale: 30
+          days-before-issue-close: 14
+          stale-issue-label: "inactive"
+          stale-issue-message: "This issue has been inactive for 30 days. Please comment if you have updates."
+          close-issue-message: "This issue was closed due to 45 days of inactivity. Reopen if still relevant."
+          days-before-pr-stale: -1
+          days-before-pr-close: -1
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/top-issues.yaml
+++ b/.github/workflows/top-issues.yaml
@ -0,0 +1,29 @@
+name: Top issues
+on:
+  schedule:
+    - cron: '0 0/2 * * *'
+  workflow_dispatch:
+jobs:
+  ShowAndLabelTopIssues:
+    permissions:
+      issues: write
+      pull-requests: write
+      actions: read
+      contents: read
+    name: Display and label top issues
+    runs-on: ubuntu-latest
+    if: github.repository == 'mannaandpoem/OpenManus'
+    steps:
+      - name: Run top issues action
+        uses: rickstaa/top-issues-action@7e8dda5d5ae3087670f9094b9724a9a091fc3ba1 # v1.3.101
+        env:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          label: true
+          dashboard: true
+          dashboard_show_total_reactions: true
+          top_issues: true
+          top_features: true
+          top_bugs: true
+          top_pull_requests: true
+          top_list_size: 14
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,14 @@
+### Project-specific ###
+# Logs
+logs/
+
+# Data
+data/
+
+# Workspace
+workspace/
+
+### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@ -170,12 +181,19 @@ cython_debug/
 # PyPI configuration file
 .pypirc

-# Logs
-logs/
+### Visual Studio Code ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets

-# Data
-data/
+# Local History for Visual Studio Code
+.history/

-# Workspace
-workspace/
+# Built Visual Studio Code Extensions
+*.vsix
+
+# OSX
 .DS_Store
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -18,22 +18,22 @@ repos:
      - id: autoflake
        args: [
          --remove-all-unused-imports,
-          --ignore-init-module-imports,  # 忽略 __init__.py 中的导入
+          --ignore-init-module-imports,
          --expand-star-imports,
          --remove-duplicate-keys,
          --remove-unused-variables,
          --recursive,
          --in-place,
-          --exclude=__init__.py,  # 排除 __init__.py 文件
+          --exclude=__init__.py,
        ]
-        files: \.py$  # 只处理 Python 文件
+        files: \.py$

  - repo: https://github.com/pycqa/isort
    rev: 5.12.0
    hooks:
      - id: isort
        args: [
-          "--profile", "black",  # 使用 black 兼容的配置
+          "--profile", "black",
          "--filter-files",
          "--lines-after-imports=2",
        ]
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -0,0 +1,8 @@
+{
+	"recommendations": [
+		"tamasfe.even-better-toml",
+		"ms-python.black-formatter",
+		"ms-python.isort"
+	],
+	"unwantedRecommendations": []
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,20 @@
+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": "always"
+        }
+    },
+    "[toml]": {
+        "editor.defaultFormatter": "tamasfe.even-better-toml",
+    },
+    "pre-commit-helper.runOnSave": "none",
+    "pre-commit-helper.config": ".pre-commit-config.yaml",
+    "evenBetterToml.schema.enabled": true,
+    "evenBetterToml.schema.associations": {
+        "^.+config[/\\\\].+\\.toml$": "../config/schema.config.json"
+    },
+    "files.insertFinalNewline": true,
+    "files.trimTrailingWhitespace": true,
+    "editor.formatOnSave": true
+}
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,162 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people.
+* Being respectful of differing opinions, viewpoints, and experiences.
+* Giving and gracefully accepting constructive feedback.
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience.
+* Focusing on what is best not just for us as individuals, but for the overall
+  community.
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind.
+* Trolling, insulting or derogatory comments, and personal or political attacks.
+* Public or private harassment.
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission.
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting.
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official email address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+mannaandpoem@gmail.com
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+### Slack and Discord Etiquettes
+
+These Slack and Discord etiquette guidelines are designed to foster an inclusive, respectful, and productive environment
+for all community members. By following these best practices, we ensure effective communication and collaboration while
+minimizing disruptions. Let’s work together to build a supportive and welcoming community!
+
+- Communicate respectfully and professionally, avoiding sarcasm or harsh language, and remember that tone can be
+  difficult to interpret in text.
+- Use threads for specific discussions to keep channels organized and easier to follow.
+- Tag others only when their input is critical or urgent, and use @here, @channel or @everyone sparingly to minimize
+  disruptions.
+- Be patient, as open-source contributors and maintainers often have other commitments and may need time to respond.
+- Post questions or discussions in the most relevant
+  channel ([discord - #general](https://discord.com/channels/1125308739348594758/1138430348557025341)).
+- When asking for help or raising issues, include necessary details like links, screenshots, or clear explanations to
+  provide context.
+- Keep discussions in public channels whenever possible to allow others to benefit from the conversation, unless the
+  matter is sensitive or private.
+- Always adhere to [our standards](https://github.com/mannaandpoem/OpenManus/blob/main/CODE_OF_CONDUCT.md#our-standards)
+  to ensure a welcoming and collaborative environment.
+- If you choose to mute a channel, consider setting up alerts for topics that still interest you to stay engaged. For
+  Slack, Go to Settings → Notifications → My Keywords to add specific keywords that will notify you when mentioned. For
+  example, if you're here for discussions about LLMs, mute the channel if it’s too busy, but set notifications to alert
+  you only when “LLMs” appears in messages. Also for Discord, go to the channel notifications and choose the option that
+  best describes your need.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+
+[Mozilla CoC]: https://github.com/mozilla/diversity
+
+[FAQ]: https://www.contributor-covenant.org/faq
+
+[translations]: https://www.contributor-covenant.org/translations
--- a/13
+++ b/13
@ -0,0 +1,13 @@
+FROM python:3.12-slim
+
+WORKDIR /app/OpenManus
+
+RUN apt-get update && apt-get install -y --no-install-recommends git curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && (command -v uv >/dev/null 2>&1 || pip install --no-cache-dir uv)
+
+COPY . .
+
+RUN uv pip install --system -r requirements.txt
+
+CMD ["bash"]
--- a/README.md
+++ b/README.md
@ -1,22 +1,36 @@
-<p align="left">
-    <a href="README_zh.md">中文</a>&nbsp ｜ &nbspEnglish&nbsp
+<p align="center">
+  <img src="assets/logo.jpg" width="200"/>
 </p>

-# OpenManus 🙋
-Manus is incredible, but OpenManus can achieve any ideas without an Invite Code 🛫!
+English | [中文](README_zh.md) | [한국어](README_ko.md) | [日本語](README_ja.md)

-Our team members [@mannaandpoem](https://github.com/mannaandpoem) [@XiangJinyu](https://github.com/XiangJinyu) [@MoshiQAQ](https://github.com/MoshiQAQ) [@didiforgithub](https://github.com/didiforgithub) from [@MetaGPT](https://github.com/geekan/MetaGPT) built it within 3 hours!
+[![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
+&ensp;
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
+[![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
+
+# 👋 OpenManus
+
+Manus is incredible, but OpenManus can achieve any idea without an *Invite Code* 🛫!
+
+Our team members [@Xinbin Liang](https://github.com/mannaandpoem) and [@Jinyu Xiang](https://github.com/XiangJinyu) (core authors), along with [@Zhaoyang Yu](https://github.com/MoshiQAQ), [@Jiayi Zhang](https://github.com/didiforgithub), and [@Sirui Hong](https://github.com/stellaHSR), we are from [@MetaGPT](https://github.com/geekan/MetaGPT). The prototype is launched within 3 hours and we are keeping building!

 It's a simple implementation, so we welcome any suggestions, contributions, and feedback!

 Enjoy your own agent with OpenManus!

+We're also excited to introduce [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL), an open-source project dedicated to reinforcement learning (RL)- based (such as GRPO) tuning methods for LLM agents, developed collaboratively by researchers from UIUC and OpenManus.
+
 ## Project Demo

 <video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>

 ## Installation

+We provide two installation methods. Method 2 (using uv) is recommended for faster installation and better dependency management.
+
+### Method 1: Using conda
+
 1. Create a new conda environment:

 ```bash
@ -37,6 +51,36 @@ cd OpenManus
 pip install -r requirements.txt
 ```

+### Method 2: Using uv (Recommended)
+
+1. Install uv (A fast Python package installer and resolver):
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+2. Clone the repository:
+
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+
+3. Create a new virtual environment and activate it:
+
+```bash
+uv venv --python 3.12
+source .venv/bin/activate  # On Unix/macOS
+# Or on Windows:
+# .venv\Scripts\activate
+```
+
+4. Install dependencies:
+
+```bash
+uv pip install -r requirements.txt
+```
+
 ## Configuration

 OpenManus requires configuration for the LLM APIs it uses. Follow these steps to set up your configuration:
@ -66,6 +110,7 @@ api_key = "sk-..."  # Replace with your actual API key
 ```

 ## Quick Start
+
 One line for run OpenManus:

 ```bash
@ -81,26 +126,41 @@ python run_flow.py
 ```

 ## How to contribute
+
 We welcome any friendly suggestions and helpful contributions! Just create issues or submit pull requests.

 Or contact @mannaandpoem via 📧email: mannaandpoem@gmail.com

-## Roadmap
- [ ] Better Planning
- [ ] Live Demos
- [ ] Replay
- [ ] RL Fine-tuned Models
- [ ] Comprehensive Benchmarks
+**Note**: Before submitting a pull request, please use the pre-commit tool to check your changes. Run `pre-commit run --all-files` to execute the checks.

 ## Community Group
-Join our networking group and share your experience with other developers!
+Join our networking group on Feishu and share your experience with other developers!

 <div align="center" style="display: flex; gap: 20px;">
-    <img src="assets/community_group.jpg" alt="OpenManus 交流群5" width="300" />
+    <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
 </div>

+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
+
 ## Acknowledgement

-Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) and [broswer-use](https://github.com/browser-use/browser-use) for providing basic support for this project!
+Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
+and [browser-use](https://github.com/browser-use/browser-use) for providing basic support for this project!
+
+Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands) and [SWE-agent](https://github.com/SWE-agent/SWE-agent).

 OpenManus is built by contributors from MetaGPT. Huge thanks to this agent community!
+
+## Cite
+```bibtex
+@misc{openmanus2025,
+  author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong},
+  title = {OpenManus: An open-source framework for building general AI agents},
+  year = {2025},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/mannaandpoem/OpenManus}},
+}
+```
--- a/README_ja.md
+++ b/README_ja.md
@ -0,0 +1,165 @@
+<p align="center">
+  <img src="assets/logo.jpg" width="200"/>
+</p>
+
+[English](README.md) | [中文](README_zh.md) | [한국어](README_ko.md) | 日本語
+
+[![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
+&ensp;
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
+[![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
+
+# 👋 OpenManus
+
+Manusは素晴らしいですが、OpenManusは*招待コード*なしでどんなアイデアも実現できます！🛫
+
+私たちのチームメンバー [@Xinbin Liang](https://github.com/mannaandpoem) と [@Jinyu Xiang](https://github.com/XiangJinyu)（主要開発者）、そして [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub)、[@Sirui Hong](https://github.com/stellaHSR) は [@MetaGPT](https://github.com/geekan/MetaGPT) から来ました。プロトタイプは3時間以内に立ち上げられ、継続的に開発を進めています！
+
+これはシンプルな実装ですので、どんな提案、貢献、フィードバックも歓迎します！
+
+OpenManusで自分だけのエージェントを楽しみましょう！
+
+また、UIUCとOpenManusの研究者が共同開発した[OpenManus-RL](https://github.com/OpenManus/OpenManus-RL)をご紹介できることを嬉しく思います。これは強化学習（RL）ベース（GRPOなど）のLLMエージェントチューニング手法に特化したオープンソースプロジェクトです。
+
+## プロジェクトデモ
+
+<video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>
+
+## インストール方法
+
+インストール方法は2つ提供しています。方法2（uvを使用）は、より高速なインストールと優れた依存関係管理のため推奨されています。
+
+### 方法1：condaを使用
+
+1. 新しいconda環境を作成します：
+
+```bash
+conda create -n open_manus python=3.12
+conda activate open_manus
+```
+
+2. リポジトリをクローンします：
+
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+
+3. 依存関係をインストールします：
+
+```bash
+pip install -r requirements.txt
+```
+
+### 方法2：uvを使用（推奨）
+
+1. uv（高速なPythonパッケージインストーラーと管理機能）をインストールします：
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+2. リポジトリをクローンします：
+
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+
+3. 新しい仮想環境を作成してアクティベートします：
+
+```bash
+uv venv --python 3.12
+source .venv/bin/activate  # Unix/macOSの場合
+# Windowsの場合：
+# .venv\Scripts\activate
+```
+
+4. 依存関係をインストールします：
+
+```bash
+uv pip install -r requirements.txt
+```
+
+## 設定
+
+OpenManusを使用するには、LLM APIの設定が必要です。以下の手順に従って設定してください：
+
+1. `config`ディレクトリに`config.toml`ファイルを作成します（サンプルからコピーできます）：
+
+```bash
+cp config/config.example.toml config/config.toml
+```
+
+2. `config/config.toml`を編集してAPIキーを追加し、設定をカスタマイズします：
+
+```toml
+# グローバルLLM設定
+[llm]
+model = "gpt-4o"
+base_url = "https://api.openai.com/v1"
+api_key = "sk-..."  # 実際のAPIキーに置き換えてください
+max_tokens = 4096
+temperature = 0.0
+
+# 特定のLLMモデル用のオプション設定
+[llm.vision]
+model = "gpt-4o"
+base_url = "https://api.openai.com/v1"
+api_key = "sk-..."  # 実際のAPIキーに置き換えてください
+```
+
+## クイックスタート
+
+OpenManusを実行する一行コマンド：
+
+```bash
+python main.py
+```
+
+その後、ターミナルからプロンプトを入力してください！
+
+開発中バージョンを試すには、以下を実行します：
+
+```bash
+python run_flow.py
+```
+
+## 貢献方法
+
+我々は建設的な意見や有益な貢献を歓迎します！issueを作成するか、プルリクエストを提出してください。
+
+または @mannaandpoem に📧メールでご連絡ください：mannaandpoem@gmail.com
+
+**注意**: プルリクエストを送信する前に、pre-commitツールを使用して変更を確認してください。`pre-commit run --all-files`を実行してチェックを実行します。
+
+## コミュニティグループ
+Feishuのネットワーキンググループに参加して、他の開発者と経験を共有しましょう！
+
+<div align="center" style="display: flex; gap: 20px;">
+    <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
+</div>
+
+## スター履歴
+
+[![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
+
+## 謝辞
+
+このプロジェクトの基本的なサポートを提供してくれた[anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
+と[browser-use](https://github.com/browser-use/browser-use)に感謝します！
+
+さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)、[SWE-agent](https://github.com/SWE-agent/SWE-agent)にも感謝します。
+
+OpenManusはMetaGPTのコントリビューターによって構築されました。このエージェントコミュニティに大きな感謝を！
+
+## 引用
+```bibtex
+@misc{openmanus2025,
+  author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong},
+  title = {OpenManus: An open-source framework for building general AI agents},
+  year = {2025},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/mannaandpoem/OpenManus}},
+}
--- a/README_ko.md
+++ b/README_ko.md
@ -0,0 +1,166 @@
+<p align="center">
+  <img src="assets/logo.jpg" width="200"/>
+</p>
+
+[English](README.md) | [中文](README_zh.md) | 한국어 | [日本語](README_ja.md)
+
+[![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
+&ensp;
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
+[![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
+
+# 👋 OpenManus
+
+Manus는 놀라운 도구지만, OpenManus는 *초대 코드* 없이도 모든 아이디어를 실현할 수 있습니다! 🛫
+
+우리 팀의 멤버인 [@Xinbin Liang](https://github.com/mannaandpoem)와 [@Jinyu Xiang](https://github.com/XiangJinyu) (핵심 작성자), 그리고 [@Zhaoyang Yu](https://github.com/MoshiQAQ), [@Jiayi Zhang](https://github.com/didiforgithub), [@Sirui Hong](https://github.com/stellaHSR)이 함께 했습니다. 우리는 [@MetaGPT](https://github.com/geekan/MetaGPT)로부터 왔습니다. 프로토타입은 단 3시간 만에 출시되었으며, 계속해서 발전하고 있습니다!
+
+이 프로젝트는 간단한 구현에서 시작되었으며, 여러분의 제안, 기여 및 피드백을 환영합니다!
+
+OpenManus를 통해 여러분만의 에이전트를 즐겨보세요!
+
+또한 [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL)을 소개하게 되어 기쁩니다. OpenManus와 UIUC 연구자들이 공동 개발한 이 오픈소스 프로젝트는 LLM 에이전트에 대해 강화 학습(RL) 기반 (예: GRPO) 튜닝 방법을 제공합니다.
+
+## 프로젝트 데모
+
+<video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>
+
+## 설치 방법
+
+두 가지 설치 방법을 제공합니다. **방법 2 (uv 사용)** 이 더 빠른 설치와 효율적인 종속성 관리를 위해 권장됩니다.
+
+### 방법 1: conda 사용
+
+1. 새로운 conda 환경을 생성합니다:
+
+```bash
+conda create -n open_manus python=3.12
+conda activate open_manus
+```
+
+2. 저장소를 클론합니다:
+
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+
+3. 종속성을 설치합니다:
+
+```bash
+pip install -r requirements.txt
+```
+
+### 방법 2: uv 사용 (권장)
+
+1. uv를 설치합니다. (빠른 Python 패키지 설치 및 종속성 관리 도구):
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+2. 저장소를 클론합니다:
+
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+
+3. 새로운 가상 환경을 생성하고 활성화합니다:
+
+```bash
+uv venv --python 3.12
+source .venv/bin/activate  # Unix/macOS의 경우
+# Windows의 경우:
+# .venv\Scripts\activate
+```
+
+4. 종속성을 설치합니다:
+
+```bash
+uv pip install -r requirements.txt
+```
+
+## 설정 방법
+
+OpenManus를 사용하려면 사용하는 LLM API에 대한 설정이 필요합니다. 아래 단계를 따라 설정을 완료하세요:
+
+1. `config` 디렉토리에 `config.toml` 파일을 생성하세요 (예제 파일을 복사하여 사용할 수 있습니다):
+
+```bash
+cp config/config.example.toml config/config.toml
+```
+
+2. `config/config.toml` 파일을 편집하여 API 키를 추가하고 설정을 커스터마이징하세요:
+
+```toml
+# 전역 LLM 설정
+[llm]
+model = "gpt-4o"
+base_url = "https://api.openai.com/v1"
+api_key = "sk-..."  # 실제 API 키로 변경하세요
+max_tokens = 4096
+temperature = 0.0
+
+# 특정 LLM 모델에 대한 선택적 설정
+[llm.vision]
+model = "gpt-4o"
+base_url = "https://api.openai.com/v1"
+api_key = "sk-..."  # 실제 API 키로 변경하세요
+```
+
+## 빠른 시작
+
+OpenManus를 실행하는 한 줄 명령어:
+
+```bash
+python main.py
+```
+
+이후 터미널에서 아이디어를 작성하세요!
+
+unstable 버전을 실행하려면 아래 명령어를 사용할 수도 있습니다:
+
+```bash
+python run_flow.py
+```
+
+## 기여 방법
+
+모든 친절한 제안과 유용한 기여를 환영합니다! 이슈를 생성하거나 풀 리퀘스트를 제출해 주세요.
+
+또는 📧 메일로 연락주세요. @mannaandpoem : mannaandpoem@gmail.com
+
+**참고**: pull request를 제출하기 전에 pre-commit 도구를 사용하여 변경 사항을 확인하십시오. `pre-commit run --all-files`를 실행하여 검사를 실행합니다.
+
+## 커뮤니티 그룹
+Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공유하세요!
+
+<div align="center" style="display: flex; gap: 20px;">
+    <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
+</div>
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
+
+## 감사의 글
+
+이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와
+[browser-use](https://github.com/browser-use/browser-use)에게 감사드립니다!
+
+또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands), [SWE-agent](https://github.com/SWE-agent/SWE-agent)에 깊은 감사를 드립니다.
+
+OpenManus는 MetaGPT 기여자들에 의해 개발되었습니다. 이 에이전트 커뮤니티에 깊은 감사를 전합니다!
+
+## 인용
+```bibtex
+@misc{openmanus2025,
+  author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong},
+  title = {OpenManus: An open-source framework for building general AI agents},
+  year = {2025},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/mannaandpoem/OpenManus}},
+}
+```
--- a/README_zh.md
+++ b/README_zh.md
@ -1,23 +1,37 @@
-<p align="left">
-    中文&nbsp ｜ &nbsp<a href="README.md">English</a>&nbsp
+<p align="center">
+  <img src="assets/logo.jpg" width="200"/>
 </p>

-# OpenManus 🙋  
+[English](README.md) | 中文 | [한국어](README_ko.md) | [日本語](README_ja.md)
+
+[![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
+&ensp;
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
+[![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
+
+# 👋 OpenManus

 Manus 非常棒，但 OpenManus 无需邀请码即可实现任何创意 🛫！

-我们来自 [@MetaGPT](https://github.com/geekan/MetaGPT) 的团队成员 [@mannaandpoem](https://github.com/mannaandpoem) [@XiangJinyu](https://github.com/XiangJinyu) [@MoshiQAQ](https://github.com/MoshiQAQ) [@didiforgithub](https://github.com/didiforgithub) 在 3 小时内完成了开发！
+我们的团队成员 [@Xinbin Liang](https://github.com/mannaandpoem) 和 [@Jinyu Xiang](https://github.com/XiangJinyu)（核心作者），以及 [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub) 和 [@Sirui Hong](https://github.com/stellaHSR)，来自 [@MetaGPT](https://github.com/geekan/MetaGPT)团队。我们在 3
+小时内完成了开发并持续迭代中！

 这是一个简洁的实现方案，欢迎任何建议、贡献和反馈！

 用 OpenManus 开启你的智能体之旅吧！

-## 项目演示  
+我们也非常高兴地向大家介绍 [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL)，这是一个专注于基于强化学习（RL，例如 GRPO）的方法来优化大语言模型（LLM）智能体的开源项目，由来自UIUC 和 OpenManus 的研究人员合作开发。
+
+## 项目演示

 <video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>

 ## 安装指南

+我们提供两种安装方式。推荐使用方式二（uv），因为它能提供更快的安装速度和更好的依赖管理。
+
+### 方式一：使用 conda
+
 1. 创建新的 conda 环境：

 ```bash
@ -38,6 +52,36 @@ cd OpenManus
 pip install -r requirements.txt
 ```

+### 方式二：使用 uv（推荐）
+
+1. 安装 uv（一个快速的 Python 包管理器）：
+
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+2. 克隆仓库：
+
+```bash
+git clone https://github.com/mannaandpoem/OpenManus.git
+cd OpenManus
+```
+
+3. 创建并激活虚拟环境：
+
+```bash
+uv venv --python 3.12
+source .venv/bin/activate  # Unix/macOS 系统
+# Windows 系统使用：
+# .venv\Scripts\activate
+```
+
+4. 安装依赖：
+
+```bash
+uv pip install -r requirements.txt
+```
+
 ## 配置说明

 OpenManus 需要配置使用的 LLM API，请按以下步骤设置：
@ -67,6 +111,7 @@ api_key = "sk-..."  # 替换为真实 API 密钥
 ```

 ## 快速启动
+
 一行命令运行 OpenManus：

 ```bash
@ -75,33 +120,50 @@ python main.py

 然后通过终端输入你的创意！

-如需体验开发中版本，可运行：
+如需体验不稳定的开发版本，可运行：

 ```bash
 python run_flow.py
 ```

 ## 贡献指南
+
 我们欢迎任何友好的建议和有价值的贡献！可以直接创建 issue 或提交 pull request。

-或通过📧邮件联系 @mannaandpoem：mannaandpoem@gmail.com
+或通过 📧 邮件联系 @mannaandpoem：mannaandpoem@gmail.com

-## 发展路线
- [ ] 更优的规划系统
- [ ] 实时演示功能
- [ ] 运行回放
- [ ] 强化学习微调模型
- [ ] 全面的性能基准测试
+**注意**: 在提交 pull request 之前，请使用 pre-commit 工具检查您的更改。运行 `pre-commit run --all-files` 来执行检查。

 ## 交流群
-加入我们的交流群，与其他开发者分享经验！
+
+加入我们的飞书交流群，与其他开发者分享经验！

 <div align="center" style="display: flex; gap: 20px;">
-    <img src="assets/community_group.jpg" alt="OpenManus 交流群5" width="300" />
+    <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
 </div>

+## Star 数量
+
+[![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
+
 ## 致谢

-特别感谢 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) 和 [broswer-use](https://github.com/browser-use/browser-use) 为本项目提供的基础支持！
+特别感谢 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
+和 [browser-use](https://github.com/browser-use/browser-use) 为本项目提供的基础支持！
+
+此外，我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge)，[MetaGPT](https://github.com/geekan/MetaGPT)，[OpenHands](https://github.com/All-Hands-AI/OpenHands) 和 [SWE-agent](https://github.com/SWE-agent/SWE-agent).

 OpenManus 由 MetaGPT 社区的贡献者共同构建，感谢这个充满活力的智能体开发者社区！
+
+## 引用我们
+
+```bibtex
+@misc{openmanus2025,
+  author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong},
+  title = {OpenManus: An open-source framework for building general AI agents},
+  year = {2025},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/mannaandpoem/OpenManus}},
+}
+```
--- a/app/init.py
+++ b/app/init.py
@ -0,0 +1,10 @@
+# Python version check: 3.11-3.13
+import sys
+
+
+if sys.version_info < (3, 11) or sys.version_info > (3, 13):
+    print(
+        "Warning: Unsupported Python version {ver}, please use 3.11-3.13".format(
+            ver=".".join(map(str, sys.version_info))
+        )
+    )
--- a/app/agent/base.py
+++ b/app/agent/base.py
@ -1,12 +1,12 @@
 from abc import ABC, abstractmethod
 from contextlib import asynccontextmanager
-from typing import List, Literal, Optional
+from typing import List, Optional

 from pydantic import BaseModel, Field, model_validator

 from app.llm import LLM
 from app.logger import logger
-from app.schema import AgentState, Memory, Message
+from app.schema import ROLE_TYPE, AgentState, Memory, Message


 class BaseAgent(BaseModel, ABC):
@ -82,8 +82,9 @@ class BaseAgent(BaseModel, ABC):

    def update_memory(
        self,
-        role: Literal["user", "system", "assistant", "tool"],
+        role: ROLE_TYPE,  # type: ignore
        content: str,
+        base64_image: Optional[str] = None,
        **kwargs,
    ) -> None:
        """Add a message to the agent's memory.
@ -91,6 +92,7 @@ class BaseAgent(BaseModel, ABC):
        Args:
            role: The role of the message sender (user, system, assistant, tool).
            content: The message content.
+            base64_image: Optional base64 encoded image.
            **kwargs: Additional arguments (e.g., tool_call_id for tool messages).

        Raises:
@ -106,9 +108,9 @@ class BaseAgent(BaseModel, ABC):
        if role not in message_map:
            raise ValueError(f"Unsupported message role: {role}")

-        msg_factory = message_map[role]
-        msg = msg_factory(content, **kwargs) if role == "tool" else msg_factory(content)
-        self.memory.add_message(msg)
+        # Create message with appropriate parameters based on role
+        kwargs = {"base64_image": base64_image, **(kwargs if role == "tool" else {})}
+        self.memory.add_message(message_map[role](content, **kwargs))

    async def run(self, request: Optional[str] = None) -> str:
        """Execute the agent's main loop asynchronously.
@ -144,6 +146,8 @@ class BaseAgent(BaseModel, ABC):
                results.append(f"Step {self.current_step}: {step_result}")

            if self.current_step >= self.max_steps:
+                self.current_step = 0
+                self.state = AgentState.IDLE
                results.append(f"Terminated: Reached max steps ({self.max_steps})")

        return "\n".join(results) if results else "No steps executed"
--- a/app/agent/manus.py
+++ b/app/agent/manus.py
@ -1,12 +1,20 @@
+import json
+import os
+from pathlib import Path
+from typing import Any, Optional
+
 from pydantic import Field

 from app.agent.toolcall import ToolCallAgent
+from app.logger import logger
 from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
 from app.tool import Terminate, ToolCollection
 from app.tool.browser_use_tool import BrowserUseTool
-from app.tool.file_saver import FileSaver
-from app.tool.google_search import GoogleSearch
 from app.tool.python_execute import PythonExecute
+from app.tool.str_replace_editor import StrReplaceEditor
+
+
+initial_working_directory = Path(os.getcwd()) / "workspace"


 class Manus(ToolCallAgent):
@ -23,12 +31,64 @@ class Manus(ToolCallAgent):
        "A versatile agent that can solve various tasks using multiple tools"
    )

-    system_prompt: str = SYSTEM_PROMPT
+    system_prompt: str = SYSTEM_PROMPT.format(directory=initial_working_directory)
    next_step_prompt: str = NEXT_STEP_PROMPT

+    max_observe: int = 10000
+    max_steps: int = 20
+
    # Add general-purpose tools to the tool collection
    available_tools: ToolCollection = Field(
        default_factory=lambda: ToolCollection(
-            PythonExecute(), GoogleSearch(), BrowserUseTool(), FileSaver(), Terminate()
+            PythonExecute(), BrowserUseTool(), StrReplaceEditor(), Terminate()
        )
    )
+
+    async def _handle_special_tool(self, name: str, result: Any, **kwargs):
+        if not self._is_special_tool(name):
+            return
+        else:
+            await self.available_tools.get_tool(BrowserUseTool().name).cleanup()
+            await super()._handle_special_tool(name, result, **kwargs)
+
+    async def get_browser_state(self) -> Optional[dict]:
+        """Get the current browser state for context in next steps."""
+        browser_tool = self.available_tools.get_tool(BrowserUseTool().name)
+        if not browser_tool:
+            return None
+
+        try:
+            # Get browser state directly from the tool with no context parameter
+            result = await browser_tool.get_current_state()
+
+            if result.error:
+                logger.debug(f"Browser state error: {result.error}")
+                return None
+
+            # Store screenshot if available
+            if hasattr(result, "base64_image") and result.base64_image:
+                self._current_base64_image = result.base64_image
+
+            # Parse the state info
+            return json.loads(result.output)
+
+        except Exception as e:
+            logger.debug(f"Failed to get browser state: {str(e)}")
+            return None
+
+    async def think(self) -> bool:
+        # Add your custom pre-processing here
+        browser_state = await self.get_browser_state()
+
+        # Modify the next_step_prompt temporarily
+        original_prompt = self.next_step_prompt
+        if browser_state and not browser_state.get("error"):
+            self.next_step_prompt += f"\nCurrent browser state:\nURL: {browser_state.get('url', 'N/A')}\nTitle: {browser_state.get('title', 'N/A')}\n"
+
+        # Call parent implementation
+        result = await super().think()
+
+        # Restore original prompt
+        self.next_step_prompt = original_prompt
+
+        return result
--- a/app/agent/planning.py
+++ b/app/agent/planning.py
@ -1,12 +1,12 @@
 import time
-from typing import Dict, List, Literal, Optional
+from typing import Dict, List, Optional

 from pydantic import Field, model_validator

 from app.agent.toolcall import ToolCallAgent
 from app.logger import logger
 from app.prompt.planning import NEXT_STEP_PROMPT, PLANNING_SYSTEM_PROMPT
-from app.schema import Message, ToolCall
+from app.schema import TOOL_CHOICE_TYPE, Message, ToolCall, ToolChoice
 from app.tool import PlanningTool, Terminate, ToolCollection


@ -27,7 +27,7 @@ class PlanningAgent(ToolCallAgent):
    available_tools: ToolCollection = Field(
        default_factory=lambda: ToolCollection(PlanningTool(), Terminate())
    )
-    tool_choices: Literal["none", "auto", "required"] = "auto"
+    tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO  # type: ignore
    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])

    tool_calls: List[ToolCall] = Field(default_factory=list)
@ -212,7 +212,7 @@ class PlanningAgent(ToolCallAgent):
            messages=messages,
            system_msgs=[Message.system_message(self.system_prompt)],
            tools=self.available_tools.to_params(),
-            tool_choice="required",
+            tool_choice=ToolChoice.AUTO,
        )
        assistant_msg = Message.from_tool_calls(
            content=response.content, tool_calls=response.tool_calls
--- a/app/agent/toolcall.py
+++ b/app/agent/toolcall.py
@ -1,12 +1,13 @@
 import json
-from typing import Any, List, Literal
+from typing import Any, List, Optional, Union

 from pydantic import Field

 from app.agent.react import ReActAgent
+from app.exceptions import TokenLimitExceeded
 from app.logger import logger
 from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT
-from app.schema import AgentState, Message, ToolCall
+from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice
 from app.tool import CreateChatCompletion, Terminate, ToolCollection


@ -25,12 +26,14 @@ class ToolCallAgent(ReActAgent):
    available_tools: ToolCollection = ToolCollection(
        CreateChatCompletion(), Terminate()
    )
-    tool_choices: Literal["none", "auto", "required"] = "auto"
+    tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO  # type: ignore
    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])

    tool_calls: List[ToolCall] = Field(default_factory=list)
+    _current_base64_image: Optional[str] = None

    max_steps: int = 30
+    max_observe: Optional[Union[int, bool]] = None

    async def think(self) -> bool:
        """Process current state and decide next actions using tools"""
@ -38,15 +41,36 @@ class ToolCallAgent(ReActAgent):
            user_msg = Message.user_message(self.next_step_prompt)
            self.messages += [user_msg]

-        # Get response with tool options
-        response = await self.llm.ask_tool(
-            messages=self.messages,
-            system_msgs=[Message.system_message(self.system_prompt)]
-            if self.system_prompt
-            else None,
-            tools=self.available_tools.to_params(),
-            tool_choice=self.tool_choices,
-        )
+        try:
+            # Get response with tool options
+            response = await self.llm.ask_tool(
+                messages=self.messages,
+                system_msgs=(
+                    [Message.system_message(self.system_prompt)]
+                    if self.system_prompt
+                    else None
+                ),
+                tools=self.available_tools.to_params(),
+                tool_choice=self.tool_choices,
+            )
+        except ValueError:
+            raise
+        except Exception as e:
+            # Check if this is a RetryError containing TokenLimitExceeded
+            if hasattr(e, "__cause__") and isinstance(e.__cause__, TokenLimitExceeded):
+                token_limit_error = e.__cause__
+                logger.error(
+                    f"🚨 Token limit error (from RetryError): {token_limit_error}"
+                )
+                self.memory.add_message(
+                    Message.assistant_message(
+                        f"Maximum token limit reached, cannot continue execution: {str(token_limit_error)}"
+                    )
+                )
+                self.state = AgentState.FINISHED
+                return False
+            raise
+
        self.tool_calls = response.tool_calls

        # Log response info
@ -58,10 +82,13 @@ class ToolCallAgent(ReActAgent):
            logger.info(
                f"🧰 Tools being prepared: {[call.function.name for call in response.tool_calls]}"
            )
+            logger.info(
+                f"🔧 Tool arguments: {response.tool_calls[0].function.arguments}"
+            )

        try:
            # Handle different tool_choices modes
-            if self.tool_choices == "none":
+            if self.tool_choices == ToolChoice.NONE:
                if response.tool_calls:
                    logger.warning(
                        f"🤔 Hmm, {self.name} tried to use tools when they weren't available!"
@ -81,11 +108,11 @@ class ToolCallAgent(ReActAgent):
            )
            self.memory.add_message(assistant_msg)

-            if self.tool_choices == "required" and not self.tool_calls:
+            if self.tool_choices == ToolChoice.REQUIRED and not self.tool_calls:
                return True  # Will be handled in act()

            # For 'auto' mode, continue with content if no commands but content exists
-            if self.tool_choices == "auto" and not self.tool_calls:
+            if self.tool_choices == ToolChoice.AUTO and not self.tool_calls:
                return bool(response.content)

            return bool(self.tool_calls)
@ -101,7 +128,7 @@ class ToolCallAgent(ReActAgent):
    async def act(self) -> str:
        """Execute tool calls and handle their results"""
        if not self.tool_calls:
-            if self.tool_choices == "required":
+            if self.tool_choices == ToolChoice.REQUIRED:
                raise ValueError(TOOL_CALL_REQUIRED)

            # Return last message content if no tool calls
@ -109,14 +136,24 @@ class ToolCallAgent(ReActAgent):

        results = []
        for command in self.tool_calls:
+            # Reset base64_image for each tool call
+            self._current_base64_image = None
+
            result = await self.execute_tool(command)
+
+            if self.max_observe:
+                result = result[: self.max_observe]
+
            logger.info(
                f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}"
            )

            # Add tool response to memory
            tool_msg = Message.tool_message(
-                content=result, tool_call_id=command.id, name=command.function.name
+                content=result,
+                tool_call_id=command.id,
+                name=command.function.name,
+                base64_image=self._current_base64_image,
            )
            self.memory.add_message(tool_msg)
            results.append(result)
@ -140,21 +177,34 @@ class ToolCallAgent(ReActAgent):
            logger.info(f"🔧 Activating tool: '{name}'...")
            result = await self.available_tools.execute(name=name, tool_input=args)

-            # Format result for display
+            # Handle special tools
+            await self._handle_special_tool(name=name, result=result)
+
+            # Check if result is a ToolResult with base64_image
+            if hasattr(result, "base64_image") and result.base64_image:
+                # Store the base64_image for later use in tool_message
+                self._current_base64_image = result.base64_image
+
+                # Format result for display
+                observation = (
+                    f"Observed output of cmd `{name}` executed:\n{str(result)}"
+                    if result
+                    else f"Cmd `{name}` completed with no output"
+                )
+                return observation
+
+            # Format result for display (standard case)
            observation = (
                f"Observed output of cmd `{name}` executed:\n{str(result)}"
                if result
                else f"Cmd `{name}` completed with no output"
            )

-            # Handle special tools like `finish`
-            await self._handle_special_tool(name=name, result=result)
-
            return observation
        except json.JSONDecodeError:
            error_msg = f"Error parsing arguments for {name}: Invalid JSON format"
            logger.error(
-                f"📝 Oops! The arguments for '{name}' don't make sense - invalid JSON"
+                f"📝 Oops! The arguments for '{name}' don't make sense - invalid JSON, arguments:{command.function.arguments}"
            )
            return f"Error: {error_msg}"
        except Exception as e:
--- a/app/config.py
+++ b/app/config.py
@ -1,7 +1,7 @@
 import threading
 import tomllib
 from pathlib import Path
-from typing import Dict
+from typing import Dict, List, Optional

 from pydantic import BaseModel, Field

@ -20,10 +20,51 @@ class LLMSettings(BaseModel):
    base_url: str = Field(..., description="API base URL")
    api_key: str = Field(..., description="API key")
    max_tokens: int = Field(4096, description="Maximum number of tokens per request")
+    max_input_tokens: Optional[int] = Field(
+        None,
+        description="Maximum input tokens to use across all requests (None for unlimited)",
+    )
    temperature: float = Field(1.0, description="Sampling temperature")
+    api_type: str = Field(..., description="AzureOpenai or Openai")
+    api_version: str = Field(..., description="Azure Openai version if AzureOpenai")


-class SandboxConfig(BaseModel):
+class ProxySettings(BaseModel):
+    server: str = Field(None, description="Proxy server address")
+    username: Optional[str] = Field(None, description="Proxy username")
+    password: Optional[str] = Field(None, description="Proxy password")
+
+
+class SearchSettings(BaseModel):
+    engine: str = Field(default="Google", description="Search engine the llm to use")
+
+
+class BrowserSettings(BaseModel):
+    headless: bool = Field(False, description="Whether to run browser in headless mode")
+    disable_security: bool = Field(
+        True, description="Disable browser security features"
+    )
+    extra_chromium_args: List[str] = Field(
+        default_factory=list, description="Extra arguments to pass to the browser"
+    )
+    chrome_instance_path: Optional[str] = Field(
+        None, description="Path to a Chrome instance to use"
+    )
+    wss_url: Optional[str] = Field(
+        None, description="Connect to a browser instance via WebSocket"
+    )
+    cdp_url: Optional[str] = Field(
+        None, description="Connect to a browser instance via CDP"
+    )
+    proxy: Optional[ProxySettings] = Field(
+        None, description="Proxy settings for the browser"
+    )
+    max_content_length: int = Field(
+        2000, description="Maximum length for content retrieval operations"
+    )
+
+
+class SandboxSettings(BaseModel):
    """Configuration for the execution sandbox"""

    use_sandbox: bool = Field(False, description="Whether to use the sandbox")
@ -39,7 +80,18 @@ class SandboxConfig(BaseModel):

 class AppConfig(BaseModel):
    llm: Dict[str, LLMSettings]
-    sandbox: SandboxConfig
+    sandbox: Optional[SandboxSettings] = Field(
+        None, description="Sandbox configuration"
+    )
+    browser_config: Optional[BrowserSettings] = Field(
+        None, description="Browser configuration"
+    )
+    search_config: Optional[SearchSettings] = Field(
+        None, description="Search configuration"
+    )
+
+    class Config:
+        arbitrary_types_allowed = True


 class Config:
@ -90,9 +142,55 @@ class Config:
            "base_url": base_llm.get("base_url"),
            "api_key": base_llm.get("api_key"),
            "max_tokens": base_llm.get("max_tokens", 4096),
+            "max_input_tokens": base_llm.get("max_input_tokens"),
            "temperature": base_llm.get("temperature", 1.0),
+            "api_type": base_llm.get("api_type", ""),
+            "api_version": base_llm.get("api_version", ""),
        }

+        # handle browser config.
+        browser_config = raw_config.get("browser", {})
+        browser_settings = None
+
+        if browser_config:
+            # handle proxy settings.
+            proxy_config = browser_config.get("proxy", {})
+            proxy_settings = None
+
+            if proxy_config and proxy_config.get("server"):
+                proxy_settings = ProxySettings(
+                    **{
+                        k: v
+                        for k, v in proxy_config.items()
+                        if k in ["server", "username", "password"] and v
+                    }
+                )
+
+            # filter valid browser config parameters.
+            valid_browser_params = {
+                k: v
+                for k, v in browser_config.items()
+                if k in BrowserSettings.__annotations__ and v is not None
+            }
+
+            # if there is proxy settings, add it to the parameters.
+            if proxy_settings:
+                valid_browser_params["proxy"] = proxy_settings
+
+            # only create BrowserSettings when there are valid parameters.
+            if valid_browser_params:
+                browser_settings = BrowserSettings(**valid_browser_params)
+
+        search_config = raw_config.get("search", {})
+        search_settings = None
+        if search_config:
+            search_settings = SearchSettings(**search_config)
+        sandbox_config = raw_config.get("sandbox", {})
+        if sandbox_config:
+            sandbox_settings = SandboxSettings(**sandbox_config)
+        else:
+            sandbox_settings = SandboxSettings()
+
        config_dict = {
            "llm": {
                "default": default_settings,
@ -101,7 +199,9 @@ class Config:
                    for name, override_config in llm_overrides.items()
                },
            },
-            "sandbox": raw_config.get("sandbox", {}),
+            "sandbox": sandbox_settings,
+            "browser_config": browser_settings,
+            "search_config": search_settings,
        }

        self._config = AppConfig(**config_dict)
@ -110,9 +210,18 @@ class Config:
    def llm(self) -> Dict[str, LLMSettings]:
        return self._config.llm

-    @property
-    def sandbox(self) -> SandboxConfig:
-        return self._config.sandbox
+
+def sandbox(self) -> SandboxSettings:
+    return self._config.sandbox
+
+
+def browser_config(self) -> Optional[BrowserSettings]:
+    return self._config.browser_config
+
+
+@property
+def search_config(self) -> Optional[SearchSettings]:
+    return self._config.search_config


 config = Config()
--- a/app/exceptions.py
+++ b/app/exceptions.py
@ -3,3 +3,11 @@ class ToolError(Exception):

    def __init__(self, message):
        self.message = message
+
+
+class OpenManusError(Exception):
+    """Base exception for all OpenManus errors"""
+
+
+class TokenLimitExceeded(OpenManusError):
+    """Exception raised when the token limit is exceeded"""
--- a/app/flow/base.py
+++ b/app/flow/base.py
@ -60,3 +60,32 @@ class BaseFlow(BaseModel, ABC):
    @abstractmethod
    async def execute(self, input_text: str) -> str:
        """Execute the flow with given input"""
+
+
+class PlanStepStatus(str, Enum):
+    """Enum class defining possible statuses of a plan step"""
+
+    NOT_STARTED = "not_started"
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    BLOCKED = "blocked"
+
+    @classmethod
+    def get_all_statuses(cls) -> list[str]:
+        """Return a list of all possible step status values"""
+        return [status.value for status in cls]
+
+    @classmethod
+    def get_active_statuses(cls) -> list[str]:
+        """Return a list of values representing active statuses (not started or in progress)"""
+        return [cls.NOT_STARTED.value, cls.IN_PROGRESS.value]
+
+    @classmethod
+    def get_status_marks(cls) -> Dict[str, str]:
+        """Return a mapping of statuses to their marker symbols"""
+        return {
+            cls.COMPLETED.value: "[✓]",
+            cls.IN_PROGRESS.value: "[→]",
+            cls.BLOCKED.value: "[!]",
+            cls.NOT_STARTED.value: "[ ]",
+        }
--- a/app/flow/planning.py
+++ b/app/flow/planning.py
@ -5,10 +5,10 @@ from typing import Dict, List, Optional, Union
 from pydantic import Field

 from app.agent.base import BaseAgent
-from app.flow.base import BaseFlow
+from app.flow.base import BaseFlow, PlanStepStatus
 from app.llm import LLM
 from app.logger import logger
-from app.schema import AgentState, Message
+from app.schema import AgentState, Message, ToolChoice
 from app.tool import PlanningTool


@ -109,12 +109,14 @@ class PlanningFlow(BaseFlow):

        # Create a system message for plan creation
        system_message = Message.system_message(
-            "You are a planning assistant. Your task is to create a detailed plan with clear steps."
+            "You are a planning assistant. Create a concise, actionable plan with clear steps. "
+            "Focus on key milestones rather than detailed sub-steps. "
+            "Optimize for clarity and efficiency."
        )

        # Create a user message with the request
        user_message = Message.user_message(
-            f"Create a detailed plan to accomplish this task: {request}"
+            f"Create a reasonable plan with clear steps to accomplish the task: {request}"
        )

        # Call LLM with PlanningTool
@ -122,7 +124,7 @@ class PlanningFlow(BaseFlow):
            messages=[user_message],
            system_msgs=[system_message],
            tools=[self.planning_tool.to_param()],
-            tool_choice="required",
+            tool_choice=ToolChoice.AUTO,
        )

        # Process tool calls if present
@ -181,11 +183,11 @@ class PlanningFlow(BaseFlow):
            # Find first non-completed step
            for i, step in enumerate(steps):
                if i >= len(step_statuses):
-                    status = "not_started"
+                    status = PlanStepStatus.NOT_STARTED.value
                else:
                    status = step_statuses[i]

-                if status in ["not_started", "in_progress"]:
+                if status in PlanStepStatus.get_active_statuses():
                    # Extract step type/category if available
                    step_info = {"text": step}

@ -202,17 +204,17 @@ class PlanningFlow(BaseFlow):
                            command="mark_step",
                            plan_id=self.active_plan_id,
                            step_index=i,
-                            step_status="in_progress",
+                            step_status=PlanStepStatus.IN_PROGRESS.value,
                        )
                    except Exception as e:
                        logger.warning(f"Error marking step as in_progress: {e}")
                        # Update step status directly if needed
                        if i < len(step_statuses):
-                            step_statuses[i] = "in_progress"
+                            step_statuses[i] = PlanStepStatus.IN_PROGRESS.value
                        else:
                            while len(step_statuses) < i:
-                                step_statuses.append("not_started")
-                            step_statuses.append("in_progress")
+                                step_statuses.append(PlanStepStatus.NOT_STARTED.value)
+                            step_statuses.append(PlanStepStatus.IN_PROGRESS.value)

                        plan_data["step_statuses"] = step_statuses

@ -264,7 +266,7 @@ class PlanningFlow(BaseFlow):
                command="mark_step",
                plan_id=self.active_plan_id,
                step_index=self.current_step_index,
-                step_status="completed",
+                step_status=PlanStepStatus.COMPLETED.value,
            )
            logger.info(
                f"Marked step {self.current_step_index} as completed in plan {self.active_plan_id}"
@ -278,10 +280,10 @@ class PlanningFlow(BaseFlow):

                # Ensure the step_statuses list is long enough
                while len(step_statuses) <= self.current_step_index:
-                    step_statuses.append("not_started")
+                    step_statuses.append(PlanStepStatus.NOT_STARTED.value)

                # Update the status
-                step_statuses[self.current_step_index] = "completed"
+                step_statuses[self.current_step_index] = PlanStepStatus.COMPLETED.value
                plan_data["step_statuses"] = step_statuses

    async def _get_plan_text(self) -> str:
@ -309,23 +311,18 @@ class PlanningFlow(BaseFlow):

            # Ensure step_statuses and step_notes match the number of steps
            while len(step_statuses) < len(steps):
-                step_statuses.append("not_started")
+                step_statuses.append(PlanStepStatus.NOT_STARTED.value)
            while len(step_notes) < len(steps):
                step_notes.append("")

            # Count steps by status
-            status_counts = {
-                "completed": 0,
-                "in_progress": 0,
-                "blocked": 0,
-                "not_started": 0,
-            }
+            status_counts = {status: 0 for status in PlanStepStatus.get_all_statuses()}

            for status in step_statuses:
                if status in status_counts:
                    status_counts[status] += 1

-            completed = status_counts["completed"]
+            completed = status_counts[PlanStepStatus.COMPLETED.value]
            total = len(steps)
            progress = (completed / total) * 100 if total > 0 else 0

@ -335,21 +332,19 @@ class PlanningFlow(BaseFlow):
            plan_text += (
                f"Progress: {completed}/{total} steps completed ({progress:.1f}%)\n"
            )
-            plan_text += f"Status: {status_counts['completed']} completed, {status_counts['in_progress']} in progress, "
-            plan_text += f"{status_counts['blocked']} blocked, {status_counts['not_started']} not started\n\n"
+            plan_text += f"Status: {status_counts[PlanStepStatus.COMPLETED.value]} completed, {status_counts[PlanStepStatus.IN_PROGRESS.value]} in progress, "
+            plan_text += f"{status_counts[PlanStepStatus.BLOCKED.value]} blocked, {status_counts[PlanStepStatus.NOT_STARTED.value]} not started\n\n"
            plan_text += "Steps:\n"

+            status_marks = PlanStepStatus.get_status_marks()
+
            for i, (step, status, notes) in enumerate(
                zip(steps, step_statuses, step_notes)
            ):
-                if status == "completed":
-                    status_mark = "[✓]"
-                elif status == "in_progress":
-                    status_mark = "[→]"
-                elif status == "blocked":
-                    status_mark = "[!]"
-                else:  # not_started
-                    status_mark = "[ ]"
+                # Use status marks to indicate step status
+                status_mark = status_marks.get(
+                    status, status_marks[PlanStepStatus.NOT_STARTED.value]
+                )

                plan_text += f"{i}. {status_mark} {step}\n"
                if notes:
--- a/app/llm.py
+++ b/app/llm.py
@ -1,17 +1,171 @@
-from typing import Dict, List, Literal, Optional, Union
+import math
+from typing import Dict, List, Optional, Union

+import tiktoken
 from openai import (
    APIError,
+    AsyncAzureOpenAI,
    AsyncOpenAI,
    AuthenticationError,
    OpenAIError,
    RateLimitError,
 )
-from tenacity import retry, stop_after_attempt, wait_random_exponential
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)

 from app.config import LLMSettings, config
+from app.exceptions import TokenLimitExceeded
 from app.logger import logger  # Assuming a logger is set up in your app
-from app.schema import Message
+from app.schema import (
+    ROLE_VALUES,
+    TOOL_CHOICE_TYPE,
+    TOOL_CHOICE_VALUES,
+    Message,
+    ToolChoice,
+)
+
+
+REASONING_MODELS = ["o1", "o3-mini"]
+
+
+class TokenCounter:
+    # Token constants
+    BASE_MESSAGE_TOKENS = 4
+    FORMAT_TOKENS = 2
+    LOW_DETAIL_IMAGE_TOKENS = 85
+    HIGH_DETAIL_TILE_TOKENS = 170
+
+    # Image processing constants
+    MAX_SIZE = 2048
+    HIGH_DETAIL_TARGET_SHORT_SIDE = 768
+    TILE_SIZE = 512
+
+    def __init__(self, tokenizer):
+        self.tokenizer = tokenizer
+
+    def count_text(self, text: str) -> int:
+        """Calculate tokens for a text string"""
+        return 0 if not text else len(self.tokenizer.encode(text))
+
+    def count_image(self, image_item: dict) -> int:
+        """
+        Calculate tokens for an image based on detail level and dimensions
+
+        For "low" detail: fixed 85 tokens
+        For "high" detail:
+        1. Scale to fit in 2048x2048 square
+        2. Scale shortest side to 768px
+        3. Count 512px tiles (170 tokens each)
+        4. Add 85 tokens
+        """
+        detail = image_item.get("detail", "medium")
+
+        # For low detail, always return fixed token count
+        if detail == "low":
+            return self.LOW_DETAIL_IMAGE_TOKENS
+
+        # For medium detail (default in OpenAI), use high detail calculation
+        # OpenAI doesn't specify a separate calculation for medium
+
+        # For high detail, calculate based on dimensions if available
+        if detail == "high" or detail == "medium":
+            # If dimensions are provided in the image_item
+            if "dimensions" in image_item:
+                width, height = image_item["dimensions"]
+                return self._calculate_high_detail_tokens(width, height)
+
+        # Default values when dimensions aren't available or detail level is unknown
+        if detail == "high":
+            # Default to a 1024x1024 image calculation for high detail
+            return self._calculate_high_detail_tokens(1024, 1024)  # 765 tokens
+        elif detail == "medium":
+            # Default to a medium-sized image for medium detail
+            return 1024  # This matches the original default
+        else:
+            # For unknown detail levels, use medium as default
+            return 1024
+
+    def _calculate_high_detail_tokens(self, width: int, height: int) -> int:
+        """Calculate tokens for high detail images based on dimensions"""
+        # Step 1: Scale to fit in MAX_SIZE x MAX_SIZE square
+        if width > self.MAX_SIZE or height > self.MAX_SIZE:
+            scale = self.MAX_SIZE / max(width, height)
+            width = int(width * scale)
+            height = int(height * scale)
+
+        # Step 2: Scale so shortest side is HIGH_DETAIL_TARGET_SHORT_SIDE
+        scale = self.HIGH_DETAIL_TARGET_SHORT_SIDE / min(width, height)
+        scaled_width = int(width * scale)
+        scaled_height = int(height * scale)
+
+        # Step 3: Count number of 512px tiles
+        tiles_x = math.ceil(scaled_width / self.TILE_SIZE)
+        tiles_y = math.ceil(scaled_height / self.TILE_SIZE)
+        total_tiles = tiles_x * tiles_y
+
+        # Step 4: Calculate final token count
+        return (
+            total_tiles * self.HIGH_DETAIL_TILE_TOKENS
+        ) + self.LOW_DETAIL_IMAGE_TOKENS
+
+    def count_content(self, content: Union[str, List[Union[str, dict]]]) -> int:
+        """Calculate tokens for message content"""
+        if not content:
+            return 0
+
+        if isinstance(content, str):
+            return self.count_text(content)
+
+        token_count = 0
+        for item in content:
+            if isinstance(item, str):
+                token_count += self.count_text(item)
+            elif isinstance(item, dict):
+                if "text" in item:
+                    token_count += self.count_text(item["text"])
+                elif "image_url" in item:
+                    token_count += self.count_image(item)
+        return token_count
+
+    def count_tool_calls(self, tool_calls: List[dict]) -> int:
+        """Calculate tokens for tool calls"""
+        token_count = 0
+        for tool_call in tool_calls:
+            if "function" in tool_call:
+                function = tool_call["function"]
+                token_count += self.count_text(function.get("name", ""))
+                token_count += self.count_text(function.get("arguments", ""))
+        return token_count
+
+    def count_message_tokens(self, messages: List[dict]) -> int:
+        """Calculate the total number of tokens in a message list"""
+        total_tokens = self.FORMAT_TOKENS  # Base format tokens
+
+        for message in messages:
+            tokens = self.BASE_MESSAGE_TOKENS  # Base tokens per message
+
+            # Add role tokens
+            tokens += self.count_text(message.get("role", ""))
+
+            # Add content tokens
+            if "content" in message:
+                tokens += self.count_content(message["content"])
+
+            # Add tool calls tokens
+            if "tool_calls" in message:
+                tokens += self.count_tool_calls(message["tool_calls"])
+
+            # Add name and tool_call_id tokens
+            tokens += self.count_text(message.get("name", ""))
+            tokens += self.count_text(message.get("tool_call_id", ""))
+
+            total_tokens += tokens
+
+        return total_tokens


 class LLM:
@ -35,10 +189,71 @@ class LLM:
            self.model = llm_config.model
            self.max_tokens = llm_config.max_tokens
            self.temperature = llm_config.temperature
-            self.client = AsyncOpenAI(
-                api_key=llm_config.api_key, base_url=llm_config.base_url
+            self.api_type = llm_config.api_type
+            self.api_key = llm_config.api_key
+            self.api_version = llm_config.api_version
+            self.base_url = llm_config.base_url
+
+            # Add token counting related attributes
+            self.total_input_tokens = 0
+            self.max_input_tokens = (
+                llm_config.max_input_tokens
+                if hasattr(llm_config, "max_input_tokens")
+                else None
            )

+            # Initialize tokenizer
+            try:
+                self.tokenizer = tiktoken.encoding_for_model(self.model)
+            except KeyError:
+                # If the model is not in tiktoken's presets, use cl100k_base as default
+                self.tokenizer = tiktoken.get_encoding("cl100k_base")
+
+            if self.api_type == "azure":
+                self.client = AsyncAzureOpenAI(
+                    base_url=self.base_url,
+                    api_key=self.api_key,
+                    api_version=self.api_version,
+                )
+            else:
+                self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
+
+            self.token_counter = TokenCounter(self.tokenizer)
+
+    def count_tokens(self, text: str) -> int:
+        """Calculate the number of tokens in a text"""
+        if not text:
+            return 0
+        return len(self.tokenizer.encode(text))
+
+    def count_message_tokens(self, messages: List[dict]) -> int:
+        return self.token_counter.count_message_tokens(messages)
+
+    def update_token_count(self, input_tokens: int) -> None:
+        """Update token counts"""
+        # Only track tokens if max_input_tokens is set
+        self.total_input_tokens += input_tokens
+        logger.info(
+            f"Token usage: Input={input_tokens}, Cumulative Input={self.total_input_tokens}"
+        )
+
+    def check_token_limit(self, input_tokens: int) -> bool:
+        """Check if token limits are exceeded"""
+        if self.max_input_tokens is not None:
+            return (self.total_input_tokens + input_tokens) <= self.max_input_tokens
+        # If max_input_tokens is not set, always return True
+        return True
+
+    def get_limit_error_message(self, input_tokens: int) -> str:
+        """Generate error message for token limit exceeded"""
+        if (
+            self.max_input_tokens is not None
+            and (self.total_input_tokens + input_tokens) > self.max_input_tokens
+        ):
+            return f"Request may exceed input token limit (Current: {self.total_input_tokens}, Needed: {input_tokens}, Max: {self.max_input_tokens})"
+
+        return "Token limit exceeded"
+
    @staticmethod
    def format_messages(messages: List[Union[dict, Message]]) -> List[dict]:
        """
@ -65,31 +280,67 @@ class LLM:
        formatted_messages = []

        for message in messages:
-            if isinstance(message, dict):
-                # If message is already a dict, ensure it has required fields
-                if "role" not in message:
-                    raise ValueError("Message dict must contain 'role' field")
-                formatted_messages.append(message)
-            elif isinstance(message, Message):
-                # If message is a Message object, convert it to dict
-                formatted_messages.append(message.to_dict())
-            else:
+            # Convert Message objects to dictionaries
+            if isinstance(message, Message):
+                message = message.to_dict()
+
+            if not isinstance(message, dict):
                raise TypeError(f"Unsupported message type: {type(message)}")

-        # Validate all messages have required fields
-        for msg in formatted_messages:
-            if msg["role"] not in ["system", "user", "assistant", "tool"]:
-                raise ValueError(f"Invalid role: {msg['role']}")
-            if "content" not in msg and "tool_calls" not in msg:
-                raise ValueError(
-                    "Message must contain either 'content' or 'tool_calls'"
+            # Validate required fields
+            if "role" not in message:
+                raise ValueError("Message dict must contain 'role' field")
+
+            # Process base64 images if present
+            if message.get("base64_image"):
+                # Initialize or convert content to appropriate format
+                if not message.get("content"):
+                    message["content"] = []
+                elif isinstance(message["content"], str):
+                    message["content"] = [{"type": "text", "text": message["content"]}]
+                elif isinstance(message["content"], list):
+                    # Convert string items to proper text objects
+                    message["content"] = [
+                        (
+                            {"type": "text", "text": item}
+                            if isinstance(item, str)
+                            else item
+                        )
+                        for item in message["content"]
+                    ]
+
+                # Add the image to content
+                message["content"].append(
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{message['base64_image']}"
+                        },
+                    }
                )

+                # Remove the base64_image field
+                del message["base64_image"]
+
+            # Only include messages with content or tool_calls
+            if "content" in message or "tool_calls" in message:
+                formatted_messages.append(message)
+
+        # Validate all roles
+        invalid_roles = [
+            msg for msg in formatted_messages if msg["role"] not in ROLE_VALUES
+        ]
+        if invalid_roles:
+            raise ValueError(f"Invalid role: {invalid_roles[0]['role']}")
+
        return formatted_messages

    @retry(
        wait=wait_random_exponential(min=1, max=60),
        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(
+            (OpenAIError, Exception, ValueError)
+        ),  # Don't retry TokenLimitExceeded
    )
    async def ask(
        self,
@ -111,6 +362,7 @@ class LLM:
            str: The generated response

        Raises:
+            TokenLimitExceeded: If token limits are exceeded
            ValueError: If messages are invalid or response is empty
            OpenAIError: If API call fails after retries
            Exception: For unexpected errors
@ -123,27 +375,47 @@ class LLM:
            else:
                messages = self.format_messages(messages)

+            # Calculate input token count
+            input_tokens = self.count_message_tokens(messages)
+
+            # Check if token limits are exceeded
+            if not self.check_token_limit(input_tokens):
+                error_message = self.get_limit_error_message(input_tokens)
+                # Raise a special exception that won't be retried
+                raise TokenLimitExceeded(error_message)
+
+            params = {
+                "model": self.model,
+                "messages": messages,
+            }
+
+            if self.model in REASONING_MODELS:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+                params["temperature"] = (
+                    temperature if temperature is not None else self.temperature
+                )
+
            if not stream:
                # Non-streaming request
-                response = await self.client.chat.completions.create(
-                    model=self.model,
-                    messages=messages,
-                    max_tokens=self.max_tokens,
-                    temperature=temperature or self.temperature,
-                    stream=False,
-                )
+                params["stream"] = False
+
+                response = await self.client.chat.completions.create(**params)
+
                if not response.choices or not response.choices[0].message.content:
                    raise ValueError("Empty or invalid response from LLM")
+
+                # Update token counts
+                self.update_token_count(response.usage.prompt_tokens)
+
                return response.choices[0].message.content

-            # Streaming request
-            response = await self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_tokens=self.max_tokens,
-                temperature=temperature or self.temperature,
-                stream=True,
-            )
+            # Streaming request, For streaming, update estimated token count before making the request
+            self.update_token_count(input_tokens)
+
+            params["stream"] = True
+            response = await self.client.chat.completions.create(**params)

            collected_messages = []
            async for chunk in response:
@ -155,13 +427,23 @@ class LLM:
            full_response = "".join(collected_messages).strip()
            if not full_response:
                raise ValueError("Empty response from streaming LLM")
+
            return full_response

+        except TokenLimitExceeded:
+            # Re-raise token limit errors without logging
+            raise
        except ValueError as ve:
            logger.error(f"Validation error: {ve}")
            raise
        except OpenAIError as oe:
            logger.error(f"OpenAI API error: {oe}")
+            if isinstance(oe, AuthenticationError):
+                logger.error("Authentication failed. Check API key.")
+            elif isinstance(oe, RateLimitError):
+                logger.error("Rate limit exceeded. Consider increasing retry attempts.")
+            elif isinstance(oe, APIError):
+                logger.error(f"API error: {oe}")
            raise
        except Exception as e:
            logger.error(f"Unexpected error in ask: {e}")
@ -170,14 +452,163 @@ class LLM:
    @retry(
        wait=wait_random_exponential(min=1, max=60),
        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(
+            (OpenAIError, Exception, ValueError)
+        ),  # Don't retry TokenLimitExceeded
+    )
+    async def ask_with_images(
+        self,
+        messages: List[Union[dict, Message]],
+        images: List[Union[str, dict]],
+        system_msgs: Optional[List[Union[dict, Message]]] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+    ) -> str:
+        """
+        Send a prompt with images to the LLM and get the response.
+
+        Args:
+            messages: List of conversation messages
+            images: List of image URLs or image data dictionaries
+            system_msgs: Optional system messages to prepend
+            stream (bool): Whether to stream the response
+            temperature (float): Sampling temperature for the response
+
+        Returns:
+            str: The generated response
+
+        Raises:
+            TokenLimitExceeded: If token limits are exceeded
+            ValueError: If messages are invalid or response is empty
+            OpenAIError: If API call fails after retries
+            Exception: For unexpected errors
+        """
+        try:
+            # Format messages
+            formatted_messages = self.format_messages(messages)
+
+            # Ensure the last message is from the user to attach images
+            if not formatted_messages or formatted_messages[-1]["role"] != "user":
+                raise ValueError(
+                    "The last message must be from the user to attach images"
+                )
+
+            # Process the last user message to include images
+            last_message = formatted_messages[-1]
+
+            # Convert content to multimodal format if needed
+            content = last_message["content"]
+            multimodal_content = (
+                [{"type": "text", "text": content}]
+                if isinstance(content, str)
+                else content
+                if isinstance(content, list)
+                else []
+            )
+
+            # Add images to content
+            for image in images:
+                if isinstance(image, str):
+                    multimodal_content.append(
+                        {"type": "image_url", "image_url": {"url": image}}
+                    )
+                elif isinstance(image, dict) and "url" in image:
+                    multimodal_content.append({"type": "image_url", "image_url": image})
+                elif isinstance(image, dict) and "image_url" in image:
+                    multimodal_content.append(image)
+                else:
+                    raise ValueError(f"Unsupported image format: {image}")
+
+            # Update the message with multimodal content
+            last_message["content"] = multimodal_content
+
+            # Add system messages if provided
+            if system_msgs:
+                all_messages = self.format_messages(system_msgs) + formatted_messages
+            else:
+                all_messages = formatted_messages
+
+            # Calculate tokens and check limits
+            input_tokens = self.count_message_tokens(all_messages)
+            if not self.check_token_limit(input_tokens):
+                raise TokenLimitExceeded(self.get_limit_error_message(input_tokens))
+
+            # Set up API parameters
+            params = {
+                "model": self.model,
+                "messages": all_messages,
+                "stream": stream,
+            }
+
+            # Add model-specific parameters
+            if self.model in REASONING_MODELS:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+                params["temperature"] = (
+                    temperature if temperature is not None else self.temperature
+                )
+
+            # Handle non-streaming request
+            if not stream:
+                response = await self.client.chat.completions.create(**params)
+
+                if not response.choices or not response.choices[0].message.content:
+                    raise ValueError("Empty or invalid response from LLM")
+
+                self.update_token_count(response.usage.prompt_tokens)
+                return response.choices[0].message.content
+
+            # Handle streaming request
+            self.update_token_count(input_tokens)
+            response = await self.client.chat.completions.create(**params)
+
+            collected_messages = []
+            async for chunk in response:
+                chunk_message = chunk.choices[0].delta.content or ""
+                collected_messages.append(chunk_message)
+                print(chunk_message, end="", flush=True)
+
+            print()  # Newline after streaming
+            full_response = "".join(collected_messages).strip()
+
+            if not full_response:
+                raise ValueError("Empty response from streaming LLM")
+
+            return full_response
+
+        except TokenLimitExceeded:
+            raise
+        except ValueError as ve:
+            logger.error(f"Validation error in ask_with_images: {ve}")
+            raise
+        except OpenAIError as oe:
+            logger.error(f"OpenAI API error: {oe}")
+            if isinstance(oe, AuthenticationError):
+                logger.error("Authentication failed. Check API key.")
+            elif isinstance(oe, RateLimitError):
+                logger.error("Rate limit exceeded. Consider increasing retry attempts.")
+            elif isinstance(oe, APIError):
+                logger.error(f"API error: {oe}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in ask_with_images: {e}")
+            raise
+
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(
+            (OpenAIError, Exception, ValueError)
+        ),  # Don't retry TokenLimitExceeded
    )
    async def ask_tool(
        self,
        messages: List[Union[dict, Message]],
        system_msgs: Optional[List[Union[dict, Message]]] = None,
-        timeout: int = 60,
+        timeout: int = 300,
        tools: Optional[List[dict]] = None,
-        tool_choice: Literal["none", "auto", "required"] = "auto",
+        tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO,  # type: ignore
        temperature: Optional[float] = None,
        **kwargs,
    ):
@ -197,13 +628,14 @@ class LLM:
            ChatCompletionMessage: The model's response

        Raises:
+            TokenLimitExceeded: If token limits are exceeded
            ValueError: If tools, tool_choice, or messages are invalid
            OpenAIError: If API call fails after retries
            Exception: For unexpected errors
        """
        try:
            # Validate tool_choice
-            if tool_choice not in ["none", "auto", "required"]:
+            if tool_choice not in TOOL_CHOICE_VALUES:
                raise ValueError(f"Invalid tool_choice: {tool_choice}")

            # Format messages
@ -213,6 +645,23 @@ class LLM:
            else:
                messages = self.format_messages(messages)

+            # Calculate input token count
+            input_tokens = self.count_message_tokens(messages)
+
+            # If there are tools, calculate token count for tool descriptions
+            tools_tokens = 0
+            if tools:
+                for tool in tools:
+                    tools_tokens += self.count_tokens(str(tool))
+
+            input_tokens += tools_tokens
+
+            # Check if token limits are exceeded
+            if not self.check_token_limit(input_tokens):
+                error_message = self.get_limit_error_message(input_tokens)
+                # Raise a special exception that won't be retried
+                raise TokenLimitExceeded(error_message)
+
            # Validate tools if provided
            if tools:
                for tool in tools:
@ -220,28 +669,43 @@ class LLM:
                        raise ValueError("Each tool must be a dict with 'type' field")

            # Set up the completion request
-            response = await self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                temperature=temperature or self.temperature,
-                max_tokens=self.max_tokens,
-                tools=tools,
-                tool_choice=tool_choice,
-                timeout=timeout,
+            params = {
+                "model": self.model,
+                "messages": messages,
+                "tools": tools,
+                "tool_choice": tool_choice,
+                "timeout": timeout,
                **kwargs,
-            )
+            }
+
+            if self.model in REASONING_MODELS:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+                params["temperature"] = (
+                    temperature if temperature is not None else self.temperature
+                )
+
+            response = await self.client.chat.completions.create(**params)

            # Check if response is valid
            if not response.choices or not response.choices[0].message:
                print(response)
                raise ValueError("Invalid or empty response from LLM")

+            # Update token counts
+            self.update_token_count(response.usage.prompt_tokens)
+
            return response.choices[0].message

+        except TokenLimitExceeded:
+            # Re-raise token limit errors without logging
+            raise
        except ValueError as ve:
            logger.error(f"Validation error in ask_tool: {ve}")
            raise
        except OpenAIError as oe:
+            logger.error(f"OpenAI API error: {oe}")
            if isinstance(oe, AuthenticationError):
                logger.error("Authentication failed. Check API key.")
            elif isinstance(oe, RateLimitError):
--- a/app/logger.py
+++ b/app/logger.py
@ -15,7 +15,7 @@ def define_log_level(print_level="INFO", logfile_level="DEBUG", name: str = None
    _print_level = print_level

    current_date = datetime.now()
-    formatted_date = current_date.strftime("%Y%m%d")
+    formatted_date = current_date.strftime("%Y%m%d%H%M%S")
    log_name = (
        f"{name}_{formatted_date}" if name else formatted_date
    )  # name a log with prefix name
--- a/app/prompt/manus.py
+++ b/app/prompt/manus.py
@ -1,14 +1,8 @@
-SYSTEM_PROMPT = "You are OpenManus, an all-capable AI assistant, aimed at solving any task presented by the user. You have various tools at your disposal that you can call upon to efficiently complete complex requests. Whether it's programming, information retrieval, file processing, or web browsing, you can handle it all."
-
-NEXT_STEP_PROMPT = """You can interact with the computer using PythonExecute, save important content and information files through FileSaver, open browsers with BrowserUseTool, and retrieve information using GoogleSearch.
-
-PythonExecute: Execute Python code to interact with the computer system, data processing, automation tasks, etc.
-
-FileSaver: Save files locally, such as txt, py, html, etc.
-
-BrowserUseTool: Open, browse, and use web browsers.If you open a local HTML file, you must provide the absolute path to the file.
-
-GoogleSearch: Perform web information retrieval
+SYSTEM_PROMPT = (
+    "You are OpenManus, an all-capable AI assistant, aimed at solving any task presented by the user. You have various tools at your disposal that you can call upon to efficiently complete complex requests. Whether it's programming, information retrieval, file processing, or web browsing, you can handle it all."
+    "The initial directory is: {directory}"
+)

+NEXT_STEP_PROMPT = """
 Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.
 """
--- a/app/prompt/planning.py
+++ b/app/prompt/planning.py
@ -1,25 +1,27 @@
 PLANNING_SYSTEM_PROMPT = """
-You are an expert Planning Agent tasked with solving complex problems by creating and managing structured plans.
+You are an expert Planning Agent tasked with solving problems efficiently through structured plans.
 Your job is:
 1. Analyze requests to understand the task scope
-2. Create clear, actionable plans with the `planning` tool
+2. Create a clear, actionable plan that makes meaningful progress with the `planning` tool
 3. Execute steps using available tools as needed
-4. Track progress and adapt plans dynamically
-5. Use `finish` to conclude when the task is complete
+4. Track progress and adapt plans when necessary
+5. Use `finish` to conclude immediately when the task is complete
+

 Available tools will vary by task but may include:
 - `planning`: Create, update, and track plans (commands: create, update, mark_step, etc.)
 - `finish`: End the task when complete
-
-Break tasks into logical, sequential steps. Think about dependencies and verification methods.
+Break tasks into logical steps with clear outcomes. Avoid excessive detail or sub-steps.
+Think about dependencies and verification methods.
+Know when to conclude - don't continue thinking once objectives are met.
 """

 NEXT_STEP_PROMPT = """
-Based on the current state, what's your next step?
-Consider:
-1. Do you need to create or refine a plan?
-2. Are you ready to execute a specific step?
-3. Have you completed the task?
+Based on the current state, what's your next action?
+Choose the most efficient path forward:
+1. Is the plan sufficient, or does it need refinement?
+2. Can you execute the next step immediately?
+3. Is the task complete? If so, use `finish` right away.

-Provide reasoning, then select the appropriate tool or action.
+Be concise in your reasoning, then select the appropriate tool or action.
 """
--- a/app/sandbox/client.py
+++ b/app/sandbox/client.py
@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Dict, Optional, Protocol

-from app.config import SandboxConfig
+from app.config import SandboxSettings
 from app.sandbox.core.sandbox import DockerSandbox


@ -53,7 +53,7 @@ class BaseSandboxClient(ABC):
    @abstractmethod
    async def create(
        self,
-        config: Optional[SandboxConfig] = None,
+        config: Optional[SandboxSettings] = None,
        volume_bindings: Optional[Dict[str, str]] = None,
    ) -> None:
        """Creates sandbox."""
@ -92,7 +92,7 @@ class LocalSandboxClient(BaseSandboxClient):

    async def create(
        self,
-        config: Optional[SandboxConfig] = None,
+        config: Optional[SandboxSettings] = None,
        volume_bindings: Optional[Dict[str, str]] = None,
    ) -> None:
        """Creates a sandbox.
--- a/app/sandbox/core/manager.py
+++ b/app/sandbox/core/manager.py
@ -6,7 +6,7 @@ from typing import Dict, Optional, Set
 import docker
 from docker.errors import APIError, ImageNotFound

-from app.config import SandboxConfig
+from app.config import SandboxSettings
 from app.logger import logger
 from app.sandbox.core.sandbox import DockerSandbox

@ -113,7 +113,7 @@ class SandboxManager:

    async def create_sandbox(
        self,
-        config: Optional[SandboxConfig] = None,
+        config: Optional[SandboxSettings] = None,
        volume_bindings: Optional[Dict[str, str]] = None,
    ) -> str:
        """Creates a new sandbox instance.
@ -134,7 +134,7 @@ class SandboxManager:
                    f"Maximum number of sandboxes ({self.max_sandboxes}) reached"
                )

-            config = config or SandboxConfig()
+            config = config or SandboxSettings()
            if not await self.ensure_image(config.image):
                raise RuntimeError(f"Failed to ensure Docker image: {config.image}")

--- a/app/sandbox/core/sandbox.py
+++ b/app/sandbox/core/sandbox.py
@ -10,7 +10,7 @@ import docker
 from docker.errors import NotFound
 from docker.models.containers import Container

-from app.config import SandboxConfig
+from app.config import SandboxSettings
 from app.sandbox.core.exceptions import SandboxTimeoutError
 from app.sandbox.core.terminal import AsyncDockerizedTerminal

@ -31,7 +31,7 @@ class DockerSandbox:

    def __init__(
        self,
-        config: Optional[SandboxConfig] = None,
+        config: Optional[SandboxSettings] = None,
        volume_bindings: Optional[Dict[str, str]] = None,
    ):
        """Initializes a sandbox instance.
@ -40,7 +40,7 @@ class DockerSandbox:
            config: Sandbox configuration. Default configuration used if None.
            volume_bindings: Volume mappings in {host_path: container_path} format.
        """
-        self.config = config or SandboxConfig()
+        self.config = config or SandboxSettings()
        self.volume_bindings = volume_bindings or {}
        self.client = docker.from_env()
        self.container: Optional[Container] = None
--- a/app/schema.py
+++ b/app/schema.py
@ -4,6 +4,31 @@ from typing import Any, List, Literal, Optional, Union
 from pydantic import BaseModel, Field


+class Role(str, Enum):
+    """Message role options"""
+
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
+    TOOL = "tool"
+
+
+ROLE_VALUES = tuple(role.value for role in Role)
+ROLE_TYPE = Literal[ROLE_VALUES]  # type: ignore
+
+
+class ToolChoice(str, Enum):
+    """Tool choice options"""
+
+    NONE = "none"
+    AUTO = "auto"
+    REQUIRED = "required"
+
+
+TOOL_CHOICE_VALUES = tuple(choice.value for choice in ToolChoice)
+TOOL_CHOICE_TYPE = Literal[TOOL_CHOICE_VALUES]  # type: ignore
+
+
 class AgentState(str, Enum):
    """Agent execution states"""

@ -29,11 +54,12 @@ class ToolCall(BaseModel):
 class Message(BaseModel):
    """Represents a chat message in the conversation"""

-    role: Literal["system", "user", "assistant", "tool"] = Field(...)
+    role: ROLE_TYPE = Field(...)  # type: ignore
    content: Optional[str] = Field(default=None)
    tool_calls: Optional[List[ToolCall]] = Field(default=None)
    name: Optional[str] = Field(default=None)
    tool_call_id: Optional[str] = Field(default=None)
+    base64_image: Optional[str] = Field(default=None)

    def __add__(self, other) -> List["Message"]:
        """支持 Message + list 或 Message + Message 的操作"""
@ -66,44 +92,67 @@ class Message(BaseModel):
            message["name"] = self.name
        if self.tool_call_id is not None:
            message["tool_call_id"] = self.tool_call_id
+        if self.base64_image is not None:
+            message["base64_image"] = self.base64_image
        return message

    @classmethod
-    def user_message(cls, content: str) -> "Message":
+    def user_message(
+        cls, content: str, base64_image: Optional[str] = None
+    ) -> "Message":
        """Create a user message"""
-        return cls(role="user", content=content)
+        return cls(role=Role.USER, content=content, base64_image=base64_image)

    @classmethod
    def system_message(cls, content: str) -> "Message":
        """Create a system message"""
-        return cls(role="system", content=content)
+        return cls(role=Role.SYSTEM, content=content)

    @classmethod
-    def assistant_message(cls, content: Optional[str] = None) -> "Message":
+    def assistant_message(
+        cls, content: Optional[str] = None, base64_image: Optional[str] = None
+    ) -> "Message":
        """Create an assistant message"""
-        return cls(role="assistant", content=content)
+        return cls(role=Role.ASSISTANT, content=content, base64_image=base64_image)

    @classmethod
-    def tool_message(cls, content: str, name, tool_call_id: str) -> "Message":
+    def tool_message(
+        cls, content: str, name, tool_call_id: str, base64_image: Optional[str] = None
+    ) -> "Message":
        """Create a tool message"""
-        return cls(role="tool", content=content, name=name, tool_call_id=tool_call_id)
+        return cls(
+            role=Role.TOOL,
+            content=content,
+            name=name,
+            tool_call_id=tool_call_id,
+            base64_image=base64_image,
+        )

    @classmethod
    def from_tool_calls(
-        cls, tool_calls: List[Any], content: Union[str, List[str]] = "", **kwargs
+        cls,
+        tool_calls: List[Any],
+        content: Union[str, List[str]] = "",
+        base64_image: Optional[str] = None,
+        **kwargs,
    ):
        """Create ToolCallsMessage from raw tool calls.

        Args:
            tool_calls: Raw tool calls from LLM
            content: Optional message content
+            base64_image: Optional base64 encoded image
        """
        formatted_calls = [
            {"id": call.id, "function": call.function.model_dump(), "type": "function"}
            for call in tool_calls
        ]
        return cls(
-            role="assistant", content=content, tool_calls=formatted_calls, **kwargs
+            role=Role.ASSISTANT,
+            content=content,
+            tool_calls=formatted_calls,
+            base64_image=base64_image,
+            **kwargs,
        )


--- a/app/tool/base.py
+++ b/app/tool/base.py
@ -37,6 +37,7 @@ class ToolResult(BaseModel):

    output: Any = Field(default=None)
    error: Optional[str] = Field(default=None)
+    base64_image: Optional[str] = Field(default=None)
    system: Optional[str] = Field(default=None)

    class Config:
@ -58,6 +59,7 @@ class ToolResult(BaseModel):
        return ToolResult(
            output=combine_fields(self.output, other.output),
            error=combine_fields(self.error, other.error),
+            base64_image=combine_fields(self.base64_image, other.base64_image, False),
            system=combine_fields(self.system, other.system),
        )

@ -76,7 +78,3 @@ class CLIResult(ToolResult):

 class ToolFailure(ToolResult):
    """A ToolResult that represents a failure."""
-
-
-class AgentAwareTool:
-    agent: Optional = None
--- a/app/tool/browser_use_tool.py
+++ b/app/tool/browser_use_tool.py
@ -1,35 +1,54 @@
 import asyncio
 import json
-from typing import Optional
+from typing import Generic, Optional, TypeVar

 from browser_use import Browser as BrowserUseBrowser
 from browser_use import BrowserConfig
-from browser_use.browser.context import BrowserContext
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
 from browser_use.dom.service import DomService
 from pydantic import Field, field_validator
 from pydantic_core.core_schema import ValidationInfo

+from app.config import config
+from app.llm import LLM
 from app.tool.base import BaseTool, ToolResult
+from app.tool.web_search import WebSearch


 _BROWSER_DESCRIPTION = """
-Interact with a web browser to perform various actions such as navigation, element interaction,
-content extraction, and tab management. Supported actions include:
- 'navigate': Go to a specific URL
- 'click': Click an element by index
- 'input_text': Input text into an element
- 'screenshot': Capture a screenshot
- 'get_html': Get page HTML content
- 'execute_js': Execute JavaScript code
- 'scroll': Scroll the page
- 'switch_tab': Switch to a specific tab
- 'new_tab': Open a new tab
- 'close_tab': Close the current tab
+Interact with a web browser to perform various actions such as navigation, element interaction, content extraction, and tab management. This tool provides a comprehensive set of browser automation capabilities:
+
+Navigation:
+- 'go_to_url': Go to a specific URL in the current tab
+- 'go_back': Go back
 - 'refresh': Refresh the current page
+- 'web_search': Search the query in the current tab, the query should be a search query like humans search in web, concrete and not vague or super long. More the single most important items.
+
+Element Interaction:
+- 'click_element': Click an element by index
+- 'input_text': Input text into a form element
+- 'scroll_down'/'scroll_up': Scroll the page (with optional pixel amount)
+- 'scroll_to_text': If you dont find something which you want to interact with, scroll to it
+- 'send_keys': Send strings of special keys like Escape,Backspace, Insert, PageDown, Delete, Enter, Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard.press.
+- 'get_dropdown_options': Get all options from a dropdown
+- 'select_dropdown_option': Select dropdown option for interactive element index by the text of the option you want to select
+
+Content Extraction:
+- 'extract_content': Extract page content to retrieve specific information from the page, e.g. all company names, a specifc description, all information about, links with companies in structured format or simply links
+
+Tab Management:
+- 'switch_tab': Switch to a specific tab
+- 'open_tab': Open a new tab with a URL
+- 'close_tab': Close the current tab
+
+Utility:
+- 'wait': Wait for a specified number of seconds
 """

+Context = TypeVar("Context")

-class BrowserUseTool(BaseTool):
+
+class BrowserUseTool(BaseTool, Generic[Context]):
    name: str = "browser_use"
    description: str = _BROWSER_DESCRIPTION
    parameters: dict = {
@ -38,51 +57,79 @@ class BrowserUseTool(BaseTool):
            "action": {
                "type": "string",
                "enum": [
-                    "navigate",
-                    "click",
+                    "go_to_url",
+                    "click_element",
                    "input_text",
-                    "screenshot",
-                    "get_html",
-                    "execute_js",
-                    "scroll",
+                    "scroll_down",
+                    "scroll_up",
+                    "scroll_to_text",
+                    "send_keys",
+                    "get_dropdown_options",
+                    "select_dropdown_option",
+                    "go_back",
+                    "web_search",
+                    "wait",
+                    "extract_content",
                    "switch_tab",
-                    "new_tab",
+                    "open_tab",
                    "close_tab",
-                    "refresh",
                ],
                "description": "The browser action to perform",
            },
            "url": {
                "type": "string",
-                "description": "URL for 'navigate' or 'new_tab' actions",
+                "description": "URL for 'go_to_url' or 'open_tab' actions",
            },
            "index": {
                "type": "integer",
-                "description": "Element index for 'click' or 'input_text' actions",
+                "description": "Element index for 'click_element', 'input_text', 'get_dropdown_options', or 'select_dropdown_option' actions",
            },
-            "text": {"type": "string", "description": "Text for 'input_text' action"},
-            "script": {
+            "text": {
                "type": "string",
-                "description": "JavaScript code for 'execute_js' action",
+                "description": "Text for 'input_text', 'scroll_to_text', or 'select_dropdown_option' actions",
            },
            "scroll_amount": {
                "type": "integer",
-                "description": "Pixels to scroll (positive for down, negative for up) for 'scroll' action",
+                "description": "Pixels to scroll (positive for down, negative for up) for 'scroll_down' or 'scroll_up' actions",
            },
            "tab_id": {
                "type": "integer",
                "description": "Tab ID for 'switch_tab' action",
            },
+            "query": {
+                "type": "string",
+                "description": "Search query for 'web_search' action",
+            },
+            "goal": {
+                "type": "string",
+                "description": "Extraction goal for 'extract_content' action",
+            },
+            "keys": {
+                "type": "string",
+                "description": "Keys to send for 'send_keys' action",
+            },
+            "seconds": {
+                "type": "integer",
+                "description": "Seconds to wait for 'wait' action",
+            },
        },
        "required": ["action"],
        "dependencies": {
-            "navigate": ["url"],
-            "click": ["index"],
+            "go_to_url": ["url"],
+            "click_element": ["index"],
            "input_text": ["index", "text"],
-            "execute_js": ["script"],
            "switch_tab": ["tab_id"],
-            "new_tab": ["url"],
-            "scroll": ["scroll_amount"],
+            "open_tab": ["url"],
+            "scroll_down": ["scroll_amount"],
+            "scroll_up": ["scroll_amount"],
+            "scroll_to_text": ["text"],
+            "send_keys": ["keys"],
+            "get_dropdown_options": ["index"],
+            "select_dropdown_option": ["index", "text"],
+            "go_back": [],
+            "web_search": ["query"],
+            "wait": ["seconds"],
+            "extract_content": ["goal"],
        },
    }

@ -90,6 +137,12 @@ class BrowserUseTool(BaseTool):
    browser: Optional[BrowserUseBrowser] = Field(default=None, exclude=True)
    context: Optional[BrowserContext] = Field(default=None, exclude=True)
    dom_service: Optional[DomService] = Field(default=None, exclude=True)
+    web_search_tool: WebSearch = Field(default_factory=WebSearch, exclude=True)
+
+    # Context for generic functionality
+    tool_context: Optional[Context] = Field(default=None, exclude=True)
+
+    llm: Optional[LLM] = Field(default_factory=LLM)

    @field_validator("parameters", mode="before")
    def validate_parameters(cls, v: dict, info: ValidationInfo) -> dict:
@ -100,10 +153,50 @@ class BrowserUseTool(BaseTool):
    async def _ensure_browser_initialized(self) -> BrowserContext:
        """Ensure browser and context are initialized."""
        if self.browser is None:
-            self.browser = BrowserUseBrowser(BrowserConfig(headless=False))
+            browser_config_kwargs = {"headless": False, "disable_security": True}
+
+            if config.browser_config:
+                from browser_use.browser.browser import ProxySettings
+
+                # handle proxy settings.
+                if config.browser_config.proxy and config.browser_config.proxy.server:
+                    browser_config_kwargs["proxy"] = ProxySettings(
+                        server=config.browser_config.proxy.server,
+                        username=config.browser_config.proxy.username,
+                        password=config.browser_config.proxy.password,
+                    )
+
+                browser_attrs = [
+                    "headless",
+                    "disable_security",
+                    "extra_chromium_args",
+                    "chrome_instance_path",
+                    "wss_url",
+                    "cdp_url",
+                ]
+
+                for attr in browser_attrs:
+                    value = getattr(config.browser_config, attr, None)
+                    if value is not None:
+                        if not isinstance(value, list) or value:
+                            browser_config_kwargs[attr] = value
+
+            self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs))
+
        if self.context is None:
-            self.context = await self.browser.new_context()
+            context_config = BrowserContextConfig()
+
+            # if there is context config in the config, use it.
+            if (
+                config.browser_config
+                and hasattr(config.browser_config, "new_context_config")
+                and config.browser_config.new_context_config
+            ):
+                context_config = config.browser_config.new_context_config
+
+            self.context = await self.browser.new_context(context_config)
            self.dom_service = DomService(await self.context.get_current_page())
+
        return self.context

    async def execute(
@ -112,9 +205,12 @@ class BrowserUseTool(BaseTool):
        url: Optional[str] = None,
        index: Optional[int] = None,
        text: Optional[str] = None,
-        script: Optional[str] = None,
        scroll_amount: Optional[int] = None,
        tab_id: Optional[int] = None,
+        query: Optional[str] = None,
+        goal: Optional[str] = None,
+        keys: Optional[str] = None,
+        seconds: Optional[int] = None,
        **kwargs,
    ) -> ToolResult:
        """
@ -124,10 +220,13 @@ class BrowserUseTool(BaseTool):
            action: The browser action to perform
            url: URL for navigation or new tab
            index: Element index for click or input actions
-            text: Text for input action
-            script: JavaScript code for execution
+            text: Text for input action or search query
            scroll_amount: Pixels to scroll for scroll action
            tab_id: Tab ID for switch_tab action
+            query: Search query for Google search
+            goal: Extraction goal for content extraction
+            keys: Keys to send for keyboard actions
+            seconds: Seconds to wait
            **kwargs: Additional arguments

        Returns:
@ -137,15 +236,68 @@ class BrowserUseTool(BaseTool):
            try:
                context = await self._ensure_browser_initialized()

-                if action == "navigate":
+                # Get max content length from config
+                max_content_length = getattr(
+                    config.browser_config, "max_content_length", 2000
+                )
+
+                # Navigation actions
+                if action == "go_to_url":
                    if not url:
-                        return ToolResult(error="URL is required for 'navigate' action")
-                    await context.navigate_to(url)
+                        return ToolResult(
+                            error="URL is required for 'go_to_url' action"
+                        )
+                    page = await context.get_current_page()
+                    await page.goto(url)
+                    await page.wait_for_load_state()
                    return ToolResult(output=f"Navigated to {url}")

-                elif action == "click":
+                elif action == "go_back":
+                    await context.go_back()
+                    return ToolResult(output="Navigated back")
+
+                elif action == "refresh":
+                    await context.refresh_page()
+                    return ToolResult(output="Refreshed current page")
+
+                elif action == "web_search":
+                    if not query:
+                        return ToolResult(
+                            error="Query is required for 'web_search' action"
+                        )
+                    search_results = await self.web_search_tool.execute(query)
+
+                    if search_results:
+                        # Navigate to the first search result
+                        first_result = search_results[0]
+                        if isinstance(first_result, dict) and "url" in first_result:
+                            url_to_navigate = first_result["url"]
+                        elif isinstance(first_result, str):
+                            url_to_navigate = first_result
+                        else:
+                            return ToolResult(
+                                error=f"Invalid search result format: {first_result}"
+                            )
+
+                        page = await context.get_current_page()
+                        await page.goto(url_to_navigate)
+                        await page.wait_for_load_state()
+
+                        return ToolResult(
+                            output=f"Searched for '{query}' and navigated to first result: {url_to_navigate}\nAll results:"
+                            + "\n".join([str(r) for r in search_results])
+                        )
+                    else:
+                        return ToolResult(
+                            error=f"No search results found for '{query}'"
+                        )
+
+                # Element interaction actions
+                elif action == "click_element":
                    if index is None:
-                        return ToolResult(error="Index is required for 'click' action")
+                        return ToolResult(
+                            error="Index is required for 'click_element' action"
+                        )
                    element = await context.get_dom_element_by_index(index)
                    if not element:
                        return ToolResult(error=f"Element with index {index} not found")
@ -168,60 +320,174 @@ class BrowserUseTool(BaseTool):
                        output=f"Input '{text}' into element at index {index}"
                    )

-                elif action == "screenshot":
-                    screenshot = await context.take_screenshot(full_page=True)
-                    return ToolResult(
-                        output=f"Screenshot captured (base64 length: {len(screenshot)})",
-                        system=screenshot,
+                elif action == "scroll_down" or action == "scroll_up":
+                    direction = 1 if action == "scroll_down" else -1
+                    amount = (
+                        scroll_amount
+                        if scroll_amount is not None
+                        else context.config.browser_window_size["height"]
                    )
-
-                elif action == "get_html":
-                    html = await context.get_page_html()
-                    truncated = html[:2000] + "..." if len(html) > 2000 else html
-                    return ToolResult(output=truncated)
-
-                elif action == "execute_js":
-                    if not script:
-                        return ToolResult(
-                            error="Script is required for 'execute_js' action"
-                        )
-                    result = await context.execute_javascript(script)
-                    return ToolResult(output=str(result))
-
-                elif action == "scroll":
-                    if scroll_amount is None:
-                        return ToolResult(
-                            error="Scroll amount is required for 'scroll' action"
-                        )
                    await context.execute_javascript(
-                        f"window.scrollBy(0, {scroll_amount});"
+                        f"window.scrollBy(0, {direction * amount});"
                    )
-                    direction = "down" if scroll_amount > 0 else "up"
                    return ToolResult(
-                        output=f"Scrolled {direction} by {abs(scroll_amount)} pixels"
+                        output=f"Scrolled {'down' if direction > 0 else 'up'} by {amount} pixels"
                    )

+                elif action == "scroll_to_text":
+                    if not text:
+                        return ToolResult(
+                            error="Text is required for 'scroll_to_text' action"
+                        )
+                    page = await context.get_current_page()
+                    try:
+                        locator = page.get_by_text(text, exact=False)
+                        await locator.scroll_into_view_if_needed()
+                        return ToolResult(output=f"Scrolled to text: '{text}'")
+                    except Exception as e:
+                        return ToolResult(error=f"Failed to scroll to text: {str(e)}")
+
+                elif action == "send_keys":
+                    if not keys:
+                        return ToolResult(
+                            error="Keys are required for 'send_keys' action"
+                        )
+                    page = await context.get_current_page()
+                    await page.keyboard.press(keys)
+                    return ToolResult(output=f"Sent keys: {keys}")
+
+                elif action == "get_dropdown_options":
+                    if index is None:
+                        return ToolResult(
+                            error="Index is required for 'get_dropdown_options' action"
+                        )
+                    element = await context.get_dom_element_by_index(index)
+                    if not element:
+                        return ToolResult(error=f"Element with index {index} not found")
+                    page = await context.get_current_page()
+                    options = await page.evaluate(
+                        """
+                        (xpath) => {
+                            const select = document.evaluate(xpath, document, null,
+                                XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
+                            if (!select) return null;
+                            return Array.from(select.options).map(opt => ({
+                                text: opt.text,
+                                value: opt.value,
+                                index: opt.index
+                            }));
+                        }
+                    """,
+                        element.xpath,
+                    )
+                    return ToolResult(output=f"Dropdown options: {options}")
+
+                elif action == "select_dropdown_option":
+                    if index is None or not text:
+                        return ToolResult(
+                            error="Index and text are required for 'select_dropdown_option' action"
+                        )
+                    element = await context.get_dom_element_by_index(index)
+                    if not element:
+                        return ToolResult(error=f"Element with index {index} not found")
+                    page = await context.get_current_page()
+                    await page.select_option(element.xpath, label=text)
+                    return ToolResult(
+                        output=f"Selected option '{text}' from dropdown at index {index}"
+                    )
+
+                # Content extraction actions
+                elif action == "extract_content":
+                    if not goal:
+                        return ToolResult(
+                            error="Goal is required for 'extract_content' action"
+                        )
+                    page = await context.get_current_page()
+                    try:
+                        # Get page content and convert to markdown for better processing
+                        html_content = await page.content()
+
+                        # Import markdownify here to avoid global import
+                        try:
+                            import markdownify
+
+                            content = markdownify.markdownify(html_content)
+                        except ImportError:
+                            # Fallback if markdownify is not available
+                            content = html_content
+
+                        # Create prompt for LLM
+                        prompt_text = """
+Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page.
+
+Examples of extraction goals:
+- Extract all company names
+- Extract specific descriptions
+- Extract all information about a topic
+- Extract links with companies in structured format
+- Extract all links
+
+If the goal is vague, summarize the page. Respond in JSON format.
+
+Extraction goal: {goal}
+
+Page content:
+{page}
+"""
+                        # Format the prompt with the goal and content
+                        max_content_length = min(50000, len(content))
+                        formatted_prompt = prompt_text.format(
+                            goal=goal, page=content[:max_content_length]
+                        )
+
+                        # Create a proper message list for the LLM
+                        from app.schema import Message
+
+                        messages = [Message.user_message(formatted_prompt)]
+
+                        # Use LLM to extract content based on the goal
+                        response = await self.llm.ask(messages)
+
+                        msg = f"Extracted from page:\n{response}\n"
+                        return ToolResult(output=msg)
+                    except Exception as e:
+                        # Provide a more helpful error message
+                        error_msg = f"Failed to extract content: {str(e)}"
+                        try:
+                            # Try to return a portion of the page content as fallback
+                            return ToolResult(
+                                output=f"{error_msg}\nHere's a portion of the page content:\n{content[:2000]}..."
+                            )
+                        except:
+                            # If all else fails, just return the error
+                            return ToolResult(error=error_msg)
+
+                # Tab management actions
                elif action == "switch_tab":
                    if tab_id is None:
                        return ToolResult(
                            error="Tab ID is required for 'switch_tab' action"
                        )
                    await context.switch_to_tab(tab_id)
+                    page = await context.get_current_page()
+                    await page.wait_for_load_state()
                    return ToolResult(output=f"Switched to tab {tab_id}")

-                elif action == "new_tab":
+                elif action == "open_tab":
                    if not url:
-                        return ToolResult(error="URL is required for 'new_tab' action")
+                        return ToolResult(error="URL is required for 'open_tab' action")
                    await context.create_new_tab(url)
-                    return ToolResult(output=f"Opened new tab with URL {url}")
+                    return ToolResult(output=f"Opened new tab with {url}")

                elif action == "close_tab":
                    await context.close_current_tab()
                    return ToolResult(output="Closed current tab")

-                elif action == "refresh":
-                    await context.refresh_page()
-                    return ToolResult(output="Refreshed current page")
+                # Utility actions
+                elif action == "wait":
+                    seconds_to_wait = seconds if seconds is not None else 3
+                    await asyncio.sleep(seconds_to_wait)
+                    return ToolResult(output=f"Waited for {seconds_to_wait} seconds")

                else:
                    return ToolResult(error=f"Unknown action: {action}")
@ -229,21 +495,58 @@ class BrowserUseTool(BaseTool):
            except Exception as e:
                return ToolResult(error=f"Browser action '{action}' failed: {str(e)}")

-    async def get_current_state(self) -> ToolResult:
-        """Get the current browser state as a ToolResult."""
-        async with self.lock:
-            try:
-                context = await self._ensure_browser_initialized()
-                state = await context.get_state()
-                state_info = {
-                    "url": state.url,
-                    "title": state.title,
-                    "tabs": [tab.model_dump() for tab in state.tabs],
-                    "interactive_elements": state.element_tree.clickable_elements_to_string(),
-                }
-                return ToolResult(output=json.dumps(state_info))
-            except Exception as e:
-                return ToolResult(error=f"Failed to get browser state: {str(e)}")
+    async def get_current_state(
+        self, context: Optional[BrowserContext] = None
+    ) -> ToolResult:
+        """
+        Get the current browser state as a ToolResult.
+        If context is not provided, uses self.context.
+        """
+        try:
+            # Use provided context or fall back to self.context
+            ctx = context or self.context
+            if not ctx:
+                return ToolResult(error="Browser context not initialized")
+
+            state = await ctx.get_state()
+
+            # Create a viewport_info dictionary if it doesn't exist
+            viewport_height = 0
+            if hasattr(state, "viewport_info") and state.viewport_info:
+                viewport_height = state.viewport_info.height
+            elif hasattr(ctx, "config") and hasattr(ctx.config, "browser_window_size"):
+                viewport_height = ctx.config.browser_window_size.get("height", 0)
+
+            # Take a screenshot for the state
+            screenshot = await ctx.take_screenshot(full_page=True)
+
+            # Build the state info with all required fields
+            state_info = {
+                "url": state.url,
+                "title": state.title,
+                "tabs": [tab.model_dump() for tab in state.tabs],
+                "help": "[0], [1], [2], etc., represent clickable indices corresponding to the elements listed. Clicking on these indices will navigate to or interact with the respective content behind them.",
+                "interactive_elements": (
+                    state.element_tree.clickable_elements_to_string()
+                    if state.element_tree
+                    else ""
+                ),
+                "scroll_info": {
+                    "pixels_above": getattr(state, "pixels_above", 0),
+                    "pixels_below": getattr(state, "pixels_below", 0),
+                    "total_height": getattr(state, "pixels_above", 0)
+                    + getattr(state, "pixels_below", 0)
+                    + viewport_height,
+                },
+                "viewport_height": viewport_height,
+            }
+
+            return ToolResult(
+                output=json.dumps(state_info, indent=4, ensure_ascii=False),
+                base64_image=screenshot,
+            )
+        except Exception as e:
+            return ToolResult(error=f"Failed to get browser state: {str(e)}")

    async def cleanup(self):
        """Clean up browser resources."""
@ -265,3 +568,10 @@ class BrowserUseTool(BaseTool):
                loop = asyncio.new_event_loop()
                loop.run_until_complete(self.cleanup())
                loop.close()
+
+    @classmethod
+    def create_with_context(cls, context: Context) -> "BrowserUseTool[Context]":
+        """Factory method to create a BrowserUseTool with a specific context."""
+        tool = cls()
+        tool.tool_context = context
+        return tool
--- a/app/tool/file_operators.py
+++ b/app/tool/file_operators.py
@ -4,7 +4,7 @@ import asyncio
 from pathlib import Path
 from typing import Optional, Protocol, Tuple, Union, runtime_checkable

-from app.config import SandboxConfig
+from app.config import SandboxSettings
 from app.exceptions import ToolError
 from app.sandbox.client import SANDBOX_CLIENT

@ -100,7 +100,7 @@ class SandboxFileOperator(FileOperator):
    async def _ensure_sandbox_initialized(self):
        """Ensure sandbox is initialized."""
        if not self.sandbox_client.sandbox:
-            await self.sandbox_client.create(config=SandboxConfig())
+            await self.sandbox_client.create(config=SandboxSettings())

    async def read_file(self, path: PathLike) -> str:
        """Read content from a file in sandbox."""
--- a/app/tool/file_saver.py
+++ b/app/tool/file_saver.py
@ -1,8 +1,8 @@
-import asyncio
 import os

 import aiofiles

+from app.config import WORKSPACE_ROOT
 from app.tool.base import BaseTool


@ -46,16 +46,22 @@ The tool accepts content and a file path, and saves the content to that location
            str: A message indicating the result of the operation.
        """
        try:
+            # Place the generated file in the workspace directory
+            if os.path.isabs(file_path):
+                file_name = os.path.basename(file_path)
+                full_path = os.path.join(WORKSPACE_ROOT, file_name)
+            else:
+                full_path = os.path.join(WORKSPACE_ROOT, file_path)
+
            # Ensure the directory exists
-            directory = os.path.dirname(file_path)
+            directory = os.path.dirname(full_path)
            if directory and not os.path.exists(directory):
                os.makedirs(directory)

            # Write directly to the file
-            async with aiofiles.open(file_path, mode, encoding="utf-8") as file:
+            async with aiofiles.open(full_path, mode, encoding="utf-8") as file:
                await file.write(content)

-            return f"Content successfully saved to {file_path}"
+            return f"Content successfully saved to {full_path}"
        except Exception as e:
            return f"Error saving file: {str(e)}"
-
--- a/app/tool/google_search.py
+++ b/app/tool/google_search.py
@ -1,48 +0,0 @@
-import asyncio
-from typing import List
-
-from googlesearch import search
-
-from app.tool.base import BaseTool
-
-
-class GoogleSearch(BaseTool):
-    name: str = "google_search"
-    description: str = """Perform a Google search and return a list of relevant links.
-Use this tool when you need to find information on the web, get up-to-date data, or research specific topics.
-The tool returns a list of URLs that match the search query.
-"""
-    parameters: dict = {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": "(required) The search query to submit to Google.",
-            },
-            "num_results": {
-                "type": "integer",
-                "description": "(optional) The number of search results to return. Default is 10.",
-                "default": 10,
-            },
-        },
-        "required": ["query"],
-    }
-
-    async def execute(self, query: str, num_results: int = 10) -> List[str]:
-        """
-        Execute a Google search and return a list of URLs.
-
-        Args:
-            query (str): The search query to submit to Google.
-            num_results (int, optional): The number of search results to return. Default is 10.
-
-        Returns:
-            List[str]: A list of URLs matching the search query.
-        """
-        # Run the search in a thread pool to prevent blocking
-        loop = asyncio.get_event_loop()
-        links = await loop.run_in_executor(
-            None, lambda: list(search(query, num_results=num_results))
-        )
-
-        return links
--- a/app/tool/python_execute.py
+++ b/app/tool/python_execute.py
@ -1,4 +1,6 @@
-import threading
+import multiprocessing
+import sys
+from io import StringIO
 from typing import Dict

 from app.tool.base import BaseTool
@ -20,6 +22,20 @@ class PythonExecute(BaseTool):
        "required": ["code"],
    }

+    def _run_code(self, code: str, result_dict: dict, safe_globals: dict) -> None:
+        original_stdout = sys.stdout
+        try:
+            output_buffer = StringIO()
+            sys.stdout = output_buffer
+            exec(code, safe_globals, safe_globals)
+            result_dict["observation"] = output_buffer.getvalue()
+            result_dict["success"] = True
+        except Exception as e:
+            result_dict["observation"] = str(e)
+            result_dict["success"] = False
+        finally:
+            sys.stdout = original_stdout
+
    async def execute(
        self,
        code: str,
@ -35,36 +51,25 @@ class PythonExecute(BaseTool):
        Returns:
            Dict: Contains 'output' with execution output or error message and 'success' status.
        """
-        result = {"observation": ""}

-        def run_code():
-            try:
-                safe_globals = {"__builtins__": dict(__builtins__)}
+        with multiprocessing.Manager() as manager:
+            result = manager.dict({"observation": "", "success": False})
+            if isinstance(__builtins__, dict):
+                safe_globals = {"__builtins__": __builtins__}
+            else:
+                safe_globals = {"__builtins__": __builtins__.__dict__.copy()}
+            proc = multiprocessing.Process(
+                target=self._run_code, args=(code, result, safe_globals)
+            )
+            proc.start()
+            proc.join(timeout)

-                import sys
-                from io import StringIO
-
-                output_buffer = StringIO()
-                sys.stdout = output_buffer
-
-                exec(code, safe_globals, {})
-
-                sys.stdout = sys.__stdout__
-
-                result["observation"] = output_buffer.getvalue()
-
-            except Exception as e:
-                result["observation"] = str(e)
-                result["success"] = False
-
-        thread = threading.Thread(target=run_code)
-        thread.start()
-        thread.join(timeout)
-
-        if thread.is_alive():
-            return {
-                "observation": f"Execution timeout after {timeout} seconds",
-                "success": False,
-            }
-
-        return result
+            # timeout process
+            if proc.is_alive():
+                proc.terminate()
+                proc.join(1)
+                return {
+                    "observation": f"Execution timeout after {timeout} seconds",
+                    "success": False,
+                }
+            return dict(result)
--- a/app/tool/search/init.py
+++ b/app/tool/search/init.py
@ -0,0 +1,12 @@
+from app.tool.search.baidu_search import BaiduSearchEngine
+from app.tool.search.base import WebSearchEngine
+from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
+from app.tool.search.google_search import GoogleSearchEngine
+
+
+__all__ = [
+    "WebSearchEngine",
+    "BaiduSearchEngine",
+    "DuckDuckGoSearchEngine",
+    "GoogleSearchEngine",
+]
--- a/app/tool/search/baidu_search.py
+++ b/app/tool/search/baidu_search.py
@ -0,0 +1,9 @@
+from baidusearch.baidusearch import search
+
+from app.tool.search.base import WebSearchEngine
+
+
+class BaiduSearchEngine(WebSearchEngine):
+    def perform_search(self, query, num_results=10, *args, **kwargs):
+        """Baidu search engine."""
+        return search(query, num_results=num_results)
--- a/app/tool/search/base.py
+++ b/app/tool/search/base.py
@ -0,0 +1,17 @@
+class WebSearchEngine(object):
+    def perform_search(
+        self, query: str, num_results: int = 10, *args, **kwargs
+    ) -> list[dict]:
+        """
+        Perform a web search and return a list of URLs.
+
+        Args:
+            query (str): The search query to submit to the search engine.
+            num_results (int, optional): The number of search results to return. Default is 10.
+            args: Additional arguments.
+            kwargs: Additional keyword arguments.
+
+        Returns:
+            List: A list of dict matching the search query.
+        """
+        raise NotImplementedError
--- a/app/tool/search/duckduckgo_search.py
+++ b/app/tool/search/duckduckgo_search.py
@ -0,0 +1,9 @@
+from duckduckgo_search import DDGS
+
+from app.tool.search.base import WebSearchEngine
+
+
+class DuckDuckGoSearchEngine(WebSearchEngine):
+    async def perform_search(self, query, num_results=10, *args, **kwargs):
+        """DuckDuckGo search engine."""
+        return DDGS.text(query, num_results=num_results)
--- a/app/tool/search/google_search.py
+++ b/app/tool/search/google_search.py
@ -0,0 +1,9 @@
+from googlesearch import search
+
+from app.tool.search.base import WebSearchEngine
+
+
+class GoogleSearchEngine(WebSearchEngine):
+    def perform_search(self, query, num_results=10, *args, **kwargs):
+        """Google search engine."""
+        return search(query, num_results=num_results)
--- a/app/tool/str_replace_editor.py
+++ b/app/tool/str_replace_editor.py
@ -1,6 +1,7 @@
 """File and directory manipulation tool with sandbox support."""

 from collections import defaultdict
+from pathlib import Path
 from typing import Any, DefaultDict, List, Literal, Optional, get_args

 from app.config import config
@ -163,17 +164,15 @@ class StrReplaceEditor(BaseTool):

        return str(result)

+    # <<<<<<< HEAD
    async def validate_path(
-        self, command: str, path: str, operator: FileOperator
+        self, command: str, path: Path, operator: FileOperator
    ) -> None:
        """Validate path and command combination based on execution environment."""
        # Check if path is absolute
-        if not path.startswith("/"):
-            suggested_path = f"/{path}"
-            raise ToolError(
-                f"The path {path} is not an absolute path, it should start with `/`. "
-                f"Maybe you meant {suggested_path}?"
-            )
+        if not path.is_absolute():
+            # suggested_path = f"/{path}"
+            raise ToolError(f"The path {path} is not an absolute path")

        # Only check if path exists for non-create commands
        if command != "create":
@ -185,6 +184,27 @@ class StrReplaceEditor(BaseTool):
            # Check if path is a directory
            is_dir = await operator.is_directory(path)
            if is_dir and command != "view":
+                # =======
+                #     def validate_path(self, command: str, path: Path):
+                #         """
+                #         Check that the path/command combination is valid.
+                #         """
+                #         # Check if its an absolute path
+                #         if not path.is_absolute():
+                #             raise ToolError(f"The path {path} is not an absolute path")
+                #         # Check if path exists
+                #         if not path.exists() and command != "create":
+                #             raise ToolError(
+                #                 f"The path {path} does not exist. Please provide a valid path."
+                #             )
+                #         if path.exists() and command == "create":
+                #             raise ToolError(
+                #                 f"File already exists at: {path}. Cannot overwrite files using command `create`."
+                #             )
+                #         # Check if the path points to a directory
+                #         if path.is_dir():
+                #             if command != "view":
+                # >>>>>>> upstream/main
                raise ToolError(
                    f"The path {path} is a directory and only the `view` command can be used on directories"
                )
--- a/app/tool/terminal.py
+++ b/app/tool/terminal.py
@ -0,0 +1,182 @@
+import asyncio
+import os
+import shlex
+from typing import Optional
+
+from app.tool.base import BaseTool, CLIResult
+
+
+class Terminal(BaseTool):
+    name: str = "execute_command"
+    description: str = """Request to execute a CLI command on the system.
+Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task.
+You must tailor your command to the user's system and provide a clear explanation of what the command does.
+Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run.
+Commands will be executed in the current working directory.
+Note: You MUST append a `sleep 0.05` to the end of the command for commands that will complete in under 50ms, as this will circumvent a known issue with the terminal tool where it will sometimes not return the output when the command completes too quickly.
+"""
+    parameters: dict = {
+        "type": "object",
+        "properties": {
+            "command": {
+                "type": "string",
+                "description": "(required) The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.",
+            }
+        },
+        "required": ["command"],
+    }
+    process: Optional[asyncio.subprocess.Process] = None
+    current_path: str = os.getcwd()
+    lock: asyncio.Lock = asyncio.Lock()
+
+    async def execute(self, command: str) -> CLIResult:
+        """
+        Execute a terminal command asynchronously with persistent context.
+
+        Args:
+            command (str): The terminal command to execute.
+
+        Returns:
+            str: The output, and error of the command execution.
+        """
+        # Split the command by & to handle multiple commands
+        commands = [cmd.strip() for cmd in command.split("&") if cmd.strip()]
+        final_output = CLIResult(output="", error="")
+
+        for cmd in commands:
+            sanitized_command = self._sanitize_command(cmd)
+
+            # Handle 'cd' command internally
+            if sanitized_command.lstrip().startswith("cd "):
+                result = await self._handle_cd_command(sanitized_command)
+            else:
+                async with self.lock:
+                    try:
+                        self.process = await asyncio.create_subprocess_shell(
+                            sanitized_command,
+                            stdout=asyncio.subprocess.PIPE,
+                            stderr=asyncio.subprocess.PIPE,
+                            cwd=self.current_path,
+                        )
+                        stdout, stderr = await self.process.communicate()
+                        result = CLIResult(
+                            output=stdout.decode().strip(),
+                            error=stderr.decode().strip(),
+                        )
+                    except Exception as e:
+                        result = CLIResult(output="", error=str(e))
+                    finally:
+                        self.process = None
+
+            # Combine outputs
+            if result.output:
+                final_output.output += (
+                    (result.output + "\n") if final_output.output else result.output
+                )
+            if result.error:
+                final_output.error += (
+                    (result.error + "\n") if final_output.error else result.error
+                )
+
+        # Remove trailing newlines
+        final_output.output = final_output.output.rstrip()
+        final_output.error = final_output.error.rstrip()
+        return final_output
+
+    async def execute_in_env(self, env_name: str, command: str) -> CLIResult:
+        """
+        Execute a terminal command asynchronously within a specified Conda environment.
+
+        Args:
+            env_name (str): The name of the Conda environment.
+            command (str): The terminal command to execute within the environment.
+
+        Returns:
+            str: The output, and error of the command execution.
+        """
+        sanitized_command = self._sanitize_command(command)
+
+        # Construct the command to run within the Conda environment
+        # Using 'conda run -n env_name command' to execute without activating
+        conda_command = f"conda run -n {shlex.quote(env_name)} {sanitized_command}"
+
+        return await self.execute(conda_command)
+
+    async def _handle_cd_command(self, command: str) -> CLIResult:
+        """
+        Handle 'cd' commands to change the current path.
+
+        Args:
+            command (str): The 'cd' command to process.
+
+        Returns:
+            TerminalOutput: The result of the 'cd' command.
+        """
+        try:
+            parts = shlex.split(command)
+            if len(parts) < 2:
+                new_path = os.path.expanduser("~")
+            else:
+                new_path = os.path.expanduser(parts[1])
+
+            # Handle relative paths
+            if not os.path.isabs(new_path):
+                new_path = os.path.join(self.current_path, new_path)
+
+            new_path = os.path.abspath(new_path)
+
+            if os.path.isdir(new_path):
+                self.current_path = new_path
+                return CLIResult(
+                    output=f"Changed directory to {self.current_path}", error=""
+                )
+            else:
+                return CLIResult(output="", error=f"No such directory: {new_path}")
+        except Exception as e:
+            return CLIResult(output="", error=str(e))
+
+    @staticmethod
+    def _sanitize_command(command: str) -> str:
+        """
+        Sanitize the command for safe execution.
+
+        Args:
+            command (str): The command to sanitize.
+
+        Returns:
+            str: The sanitized command.
+        """
+        # Example sanitization: restrict certain dangerous commands
+        dangerous_commands = ["rm", "sudo", "shutdown", "reboot"]
+        try:
+            parts = shlex.split(command)
+            if any(cmd in dangerous_commands for cmd in parts):
+                raise ValueError("Use of dangerous commands is restricted.")
+        except Exception:
+            # If shlex.split fails, try basic string comparison
+            if any(cmd in command for cmd in dangerous_commands):
+                raise ValueError("Use of dangerous commands is restricted.")
+
+        # Additional sanitization logic can be added here
+        return command
+
+    async def close(self):
+        """Close the persistent shell process if it exists."""
+        async with self.lock:
+            if self.process:
+                self.process.terminate()
+                try:
+                    await asyncio.wait_for(self.process.wait(), timeout=5)
+                except asyncio.TimeoutError:
+                    self.process.kill()
+                    await self.process.wait()
+                finally:
+                    self.process = None
+
+    async def __aenter__(self):
+        """Enter the asynchronous context manager."""
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Exit the asynchronous context manager and close the process."""
+        await self.close()
--- a/app/tool/terminate.py
+++ b/app/tool/terminate.py
@ -1,7 +1,8 @@
 from app.tool.base import BaseTool


-_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task."""
+_TERMINATE_DESCRIPTION = """Terminate the interaction when the request is met OR if the assistant cannot proceed further with the task.
+When you have finished all the tasks, call this tool to end the work."""


 class Terminate(BaseTool):
--- a/app/tool/web_search.py
+++ b/app/tool/web_search.py
@ -0,0 +1,99 @@
+import asyncio
+from typing import List
+
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from app.config import config
+from app.tool.base import BaseTool
+from app.tool.search import (
+    BaiduSearchEngine,
+    DuckDuckGoSearchEngine,
+    GoogleSearchEngine,
+    WebSearchEngine,
+)
+
+
+class WebSearch(BaseTool):
+    name: str = "web_search"
+    description: str = """Perform a web search and return a list of relevant links.
+    This function attempts to use the primary search engine API to get up-to-date results.
+    If an error occurs, it falls back to an alternative search engine."""
+    parameters: dict = {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "(required) The search query to submit to the search engine.",
+            },
+            "num_results": {
+                "type": "integer",
+                "description": "(optional) The number of search results to return. Default is 10.",
+                "default": 10,
+            },
+        },
+        "required": ["query"],
+    }
+    _search_engine: dict[str, WebSearchEngine] = {
+        "google": GoogleSearchEngine(),
+        "baidu": BaiduSearchEngine(),
+        "duckduckgo": DuckDuckGoSearchEngine(),
+    }
+
+    async def execute(self, query: str, num_results: int = 10) -> List[str]:
+        """
+        Execute a Web search and return a list of URLs.
+
+        Args:
+            query (str): The search query to submit to the search engine.
+            num_results (int, optional): The number of search results to return. Default is 10.
+
+        Returns:
+            List[str]: A list of URLs matching the search query.
+        """
+        engine_order = self._get_engine_order()
+        for engine_name in engine_order:
+            engine = self._search_engine[engine_name]
+            try:
+                links = await self._perform_search_with_engine(
+                    engine, query, num_results
+                )
+                if links:
+                    return links
+            except Exception as e:
+                print(f"Search engine '{engine_name}' failed with error: {e}")
+        return []
+
+    def _get_engine_order(self) -> List[str]:
+        """
+        Determines the order in which to try search engines.
+        Preferred engine is first (based on configuration), followed by the remaining engines.
+
+        Returns:
+            List[str]: Ordered list of search engine names.
+        """
+        preferred = "google"
+        if config.search_config and config.search_config.engine:
+            preferred = config.search_config.engine.lower()
+
+        engine_order = []
+        if preferred in self._search_engine:
+            engine_order.append(preferred)
+        for key in self._search_engine:
+            if key not in engine_order:
+                engine_order.append(key)
+        return engine_order
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+    )
+    async def _perform_search_with_engine(
+        self,
+        engine: WebSearchEngine,
+        query: str,
+        num_results: int,
+    ) -> List[str]:
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            None, lambda: list(engine.perform_search(query, num_results=num_results))
+        )
--- a/assets/community_group.jpg
+++ b/assets/community_group.jpg
--- a/assets/logo.jpg
+++ b/assets/logo.jpg
--- a/config/.gitignore
+++ b/config/.gitignore
@ -0,0 +1,2 @@
+# prevent the local config file from being uploaded to the remote repository
+config.toml
--- a/config/config.example.toml
+++ b/config/config.example.toml
@ -1,23 +1,77 @@
 # Global LLM configuration
 [llm]
-model = "claude-3-5-sonnet"
-base_url = "https://api.openai.com/v1"
-api_key = "sk-..."
-max_tokens = 4096
-temperature = 0.0
+model = "claude-3-7-sonnet-20250219"        # The LLM model to use
+base_url = "https://api.anthropic.com/v1/"  # API endpoint URL
+api_key = "YOUR_API_KEY"                    # Your API key
+max_tokens = 8192                           # Maximum number of tokens in the response
+temperature = 0.0                           # Controls randomness
+
+# [llm] #AZURE OPENAI:
+# api_type= 'azure'
+# model = "YOUR_MODEL_NAME" #"gpt-4o-mini"
+# base_url = "{YOUR_AZURE_ENDPOINT.rstrip('/')}/openai/deployments/{AZURE_DEPOLYMENT_ID}"
+# api_key = "AZURE API KEY"
+# max_tokens = 8096
+# temperature = 0.0
+# api_version="AZURE API VERSION" #"2024-08-01-preview"
+
+# [llm] #OLLAMA:
+# api_type = 'ollama'
+# model = "llama3.2"
+# base_url = "http://localhost:11434/v1"
+# api_key = "ollama"
+# max_tokens = 4096
+# temperature = 0.0

 # Optional configuration for specific LLM models
 [llm.vision]
-model = "claude-3-5-sonnet"
-base_url = "https://api.openai.com/v1"
-api_key = "sk-..."
+model = "claude-3-7-sonnet-20250219"        # The vision model to use
+base_url = "https://api.anthropic.com/v1/"  # API endpoint URL for vision model
+api_key = "YOUR_API_KEY"                    # Your API key for vision model
+max_tokens = 8192                           # Maximum number of tokens in the response
+temperature = 0.0                           # Controls randomness for vision model

-# Sandbox configuration
-[sandbox]
-use_sandbox = false
-image = "python:3.10-slim"
-work_dir = "/workspace"
-memory_limit = "1g"  # 512m
-cpu_limit = 2.0
-timeout = 300
-network_enabled = false
+# [llm.vision] #OLLAMA VISION:
+# api_type = 'ollama'
+# model = "llama3.2-vision"
+# base_url = "http://localhost:11434/v1"
+# api_key = "ollama"
+# max_tokens = 4096
+# temperature = 0.0
+
+# Optional configuration for specific browser configuration
+# [browser]
+# Whether to run browser in headless mode (default: false)
+#headless = false
+# Disable browser security features (default: true)
+#disable_security = true
+# Extra arguments to pass to the browser
+#extra_chromium_args = []
+# Path to a Chrome instance to use to connect to your normal browser
+# e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
+#chrome_instance_path = ""
+# Connect to a browser instance via WebSocket
+#wss_url = ""
+# Connect to a browser instance via CDP
+#cdp_url = ""
+
+# Optional configuration, Proxy settings for the browser
+# [browser.proxy]
+# server = "http://proxy-server:port"
+# username = "proxy-username"
+# password = "proxy-password"
+
+# Optional configuration, Search settings.
+# [search]
+# Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo".
+#engine = "Google"
+
+## Sandbox configuration
+#[sandbox]
+#use_sandbox = false
+#image = "python:3.10-slim"
+#work_dir = "/workspace"
+#memory_limit = "1g"  # 512m
+#cpu_limit = 2.0
+#timeout = 300
+#network_enabled = true
--- a/examples/japan-travel-plan/japan_travel_guide_instructions.txt
+++ b/examples/japan-travel-plan/japan_travel_guide_instructions.txt
@ -0,0 +1,62 @@
+JAPAN TRAVEL HANDBOOK - GUIDE TO VERSIONS
+
+Location: D:/OpenManus/
+
+1. DETAILED DIGITAL VERSION
+File: japan_travel_handbook.html
+Best for: Desktop/laptop viewing
+Features:
+- Complete comprehensive guide
+- Detailed itinerary
+- Full proposal planning section
+- All hotel recommendations
+- Comprehensive budget breakdown
+Usage: Open in web browser for trip planning and detailed reference
+
+2. PRINT-FRIENDLY VERSION
+File: japan_travel_handbook_print.html
+Best for: Physical reference during travel
+Features:
+- Condensed essential information
+- Optimized for paper printing
+- Clear, printer-friendly formatting
+- Quick reference tables
+Usage: Print and keep in travel documents folder
+
+3. MOBILE-OPTIMIZED VERSION
+File: japan_travel_handbook_mobile.html
+Best for: On-the-go reference during trip
+Features:
+- Touch-friendly interface
+- Collapsible sections
+- Quick access emergency buttons
+- Dark mode support
+- Responsive design
+Usage: Save to phone's browser bookmarks for quick access
+
+RECOMMENDED SETUP:
+1. Before Trip:
+   - Use detailed version for planning
+   - Print the print-friendly version
+   - Save mobile version to phone
+
+2. During Trip:
+   - Keep printed version with travel documents
+   - Use mobile version for daily reference
+   - Access detailed version when needed for specific information
+
+3. Emergency Access:
+   - Mobile version has quick-access emergency information
+   - Keep printed version as backup
+   - All emergency numbers and contacts in both versions
+
+Note: All versions contain the same core information but are formatted differently for optimal use in different situations.
+
+IMPORTANT DATES:
+- Trip Duration: April 15-23, 2024
+- Proposal Day: April 19, 2024
+- Key Reservation Deadlines:
+  * Flights: Book by January 2024
+  * Hotels: Book by February 2024
+  * Restaurant Reservations: Book by January 2024
+  * JR Pass: Purchase by March 2024
--- a/examples/japan-travel-plan/japan_travel_handbook.html
+++ b/examples/japan-travel-plan/japan_travel_handbook.html
@ -0,0 +1,124 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Japan Travel Handbook - April 15-23, 2024</title>
+    <style>
+        body { font-family: Arial, sans-serif; line-height: 1.6; margin: 0; padding: 20px; }
+        .container { max-width: 1000px; margin: 0 auto; }
+        h1, h2, h3 { color: #333; }
+        .day-item { background: #f9f9f9; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .important-note { background: #ffe6e6; padding: 10px; border-radius: 5px; }
+        .phrase-table { width: 100%; border-collapse: collapse; }
+        .phrase-table td, .phrase-table th { border: 1px solid #ddd; padding: 8px; }
+        .proposal-spot { background: #e6ffe6; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .flight-info { background: #e6f3ff; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .checklist { background: #fff3e6; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .hotels { background: #e6e6ff; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .proposal-plan { background: #ffe6ff; padding: 15px; margin: 10px 0; border-radius: 5px; }
+        .checkbox-list li { list-style-type: none; margin-bottom: 8px; }
+        .checkbox-list li:before { content: "☐ "; }
+        .warning { color: #ff4444; }
+    </style>
+</head>
+<body>
+    <div class="container">
+        [Previous content remains the same...]
+
+        <div class="proposal-plan">
+            <h2>🌸 Proposal Planning Guide 🌸</h2>
+
+            <h3>Ring Security & Transport</h3>
+            <ul>
+                <li><strong>Carrying the Ring:</strong>
+                    <ul>
+                        <li>Always keep the ring in your carry-on luggage, never in checked bags</li>
+                        <li>Use a discrete, non-branded box or case</li>
+                        <li>Consider travel insurance that covers jewelry</li>
+                        <li>Keep receipt/appraisal documentation separate from the ring</li>
+                    </ul>
+                </li>
+                <li><strong>Airport Security Tips:</strong>
+                    <ul>
+                        <li>No need to declare the ring unless value exceeds ¥1,000,000 (~$6,700)</li>
+                        <li>If asked, simply state it's "personal jewelry"</li>
+                        <li>Consider requesting private screening to maintain surprise</li>
+                        <li>Keep ring in original box until through security, then transfer to more discrete case</li>
+                    </ul>
+                </li>
+            </ul>
+
+            <h3>Proposal Location Details - Maruyama Park</h3>
+            <ul>
+                <li><strong>Best Timing:</strong>
+                    <ul>
+                        <li>Date: April 19 (Day 5)</li>
+                        <li>Time: 5:30 PM (30 minutes before sunset)</li>
+                        <li>Park closes at 8:00 PM in April</li>
+                    </ul>
+                </li>
+                <li><strong>Specific Spot Recommendations:</strong>
+                    <ul>
+                        <li>Primary Location: Near the famous weeping cherry tree
+                            <br>- Less crowded in early evening
+                            <br>- Beautiful illumination starts at dusk
+                            <br>- Iconic Kyoto backdrop
+                        </li>
+                        <li>Backup Location: Gion Shirakawa area
+                            <br>- Atmospheric stone-paved street
+                            <br>- Traditional buildings and cherry trees
+                            <br>- Beautiful in light rain
+                        </li>
+                    </ul>
+                </li>
+            </ul>
+
+            <h3>Proposal Day Planning</h3>
+            <ul>
+                <li><strong>Morning Preparation:</strong>
+                    <ul>
+                        <li>Confirm weather forecast</li>
+                        <li>Transfer ring to secure pocket/bag</li>
+                        <li>Have backup indoor location details ready</li>
+                    </ul>
+                </li>
+                <li><strong>Suggested Timeline:</strong>
+                    <ul>
+                        <li>4:00 PM: Start heading to Maruyama Park area</li>
+                        <li>4:30 PM: Light refreshments at nearby tea house</li>
+                        <li>5:15 PM: Begin walk through park</li>
+                        <li>5:30 PM: Arrive at proposal spot</li>
+                        <li>6:00 PM: Sunset and illumination begins</li>
+                        <li>7:00 PM: Celebratory dinner reservation</li>
+                    </ul>
+                </li>
+            </ul>
+
+            <h3>Celebration Dinner Options</h3>
+            <ul>
+                <li><strong>Traditional Japanese:</strong> Kikunoi Roan
+                    <br>- Intimate 2-star Michelin restaurant
+                    <br>- Advance reservation required (3 months)
+                    <br>- Price: ¥15,000-20,000 per person
+                </li>
+                <li><strong>Modern Fusion:</strong> The Sodoh
+                    <br>- Beautiful garden views
+                    <br>- Western-style seating available
+                    <br>- Price: ¥12,000-15,000 per person
+                </li>
+            </ul>
+
+            <div class="warning">
+                <h3>Important Notes:</h3>
+                <ul>
+                    <li>Keep proposal plans in separate notes from shared itinerary</li>
+                    <li>Have a backup plan in case of rain (indoor locations listed above)</li>
+                    <li>Consider hiring a local photographer to capture the moment</li>
+                    <li>Save restaurant staff contact info in case of timing changes</li>
+                </ul>
+            </div>
+        </div>
+    </div>
+</body>
+</html>
--- a/examples/japan-travel-plan/japan_travel_handbook_mobile.html
+++ b/examples/japan-travel-plan/japan_travel_handbook_mobile.html
@ -0,0 +1,255 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+    <title>Japan Travel Guide (Mobile)</title>
+    <style>
+        * { box-sizing: border-box; }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
+            margin: 0;
+            padding: 10px;
+            line-height: 1.6;
+            font-size: 16px;
+        }
+        .container {
+            max-width: 100%;
+            margin: 0 auto;
+        }
+        h1 { font-size: 1.5em; margin: 10px 0; }
+        h2 { font-size: 1.3em; margin: 8px 0; }
+        h3 { font-size: 1.1em; margin: 6px 0; }
+
+        /* Mobile-friendly cards */
+        .card {
+            background: #fff;
+            border-radius: 10px;
+            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
+            margin: 10px 0;
+            padding: 15px;
+        }
+
+        /* Collapsible sections */
+        .collapsible {
+            background: #f8f9fa;
+            border: none;
+            border-radius: 8px;
+            width: 100%;
+            padding: 15px;
+            text-align: left;
+            font-size: 1.1em;
+            font-weight: bold;
+            cursor: pointer;
+            margin: 5px 0;
+        }
+
+        .content {
+            display: none;
+            padding: 10px;
+        }
+
+        .active {
+            background: #e9ecef;
+        }
+
+        /* Mobile-friendly tables */
+        .table-wrapper {
+            overflow-x: auto;
+            margin: 10px 0;
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            min-width: 300px;
+        }
+        th, td {
+            padding: 10px;
+            border: 1px solid #ddd;
+            text-align: left;
+        }
+        th {
+            background: #f8f9fa;
+        }
+
+        /* Touch-friendly lists */
+        ul, ol {
+            padding-left: 20px;
+            margin: 10px 0;
+        }
+        li {
+            margin: 8px 0;
+            padding: 5px 0;
+        }
+
+        /* Emergency info styling */
+        .emergency {
+            background: #ffe6e6;
+            border-left: 4px solid #ff4444;
+            padding: 10px;
+            margin: 10px 0;
+        }
+
+        /* Quick access buttons */
+        .quick-access {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 10px;
+            margin: 10px 0;
+        }
+        .quick-btn {
+            background: #007bff;
+            color: white;
+            border: none;
+            border-radius: 20px;
+            padding: 10px 20px;
+            font-size: 0.9em;
+            cursor: pointer;
+            flex: 1 1 auto;
+            text-align: center;
+            min-width: 120px;
+        }
+
+        /* Dark mode support */
+        @media (prefers-color-scheme: dark) {
+            body {
+                background: #1a1a1a;
+                color: #fff;
+            }
+            .card {
+                background: #2d2d2d;
+            }
+            .collapsible {
+                background: #333;
+                color: #fff;
+            }
+            .active {
+                background: #404040;
+            }
+            th {
+                background: #333;
+            }
+            td, th {
+                border-color: #404040;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Japan Travel Guide</h1>
+        <p><strong>April 15-23, 2024</strong></p>
+
+        <div class="quick-access">
+            <button class="quick-btn" onclick="showSection('emergency')">Emergency</button>
+            <button class="quick-btn" onclick="showSection('phrases')">Phrases</button>
+            <button class="quick-btn" onclick="showSection('transport')">Transport</button>
+            <button class="quick-btn" onclick="showSection('proposal')">Proposal</button>
+        </div>
+
+        <div class="emergency card" id="emergency">
+            <h2>Emergency Contacts</h2>
+            <ul>
+                <li>🚑 Emergency: 119</li>
+                <li>👮 Police: 110</li>
+                <li>🏢 US Embassy: +81-3-3224-5000</li>
+                <li>ℹ️ Tourist Info: 03-3201-3331</li>
+            </ul>
+        </div>
+
+        <button class="collapsible">📅 Daily Itinerary</button>
+        <div class="content">
+            <div class="table-wrapper">
+                <table>
+                    <tr><th>Date</th><th>Location</th><th>Activities</th></tr>
+                    <tr><td>Apr 15</td><td>Tokyo</td><td>Arrival, Shinjuku</td></tr>
+                    <tr><td>Apr 16</td><td>Tokyo</td><td>Meiji, Harajuku, Senso-ji</td></tr>
+                    <tr><td>Apr 17</td><td>Tokyo</td><td>Tea Ceremony, Budokan</td></tr>
+                    <tr><td>Apr 18</td><td>Kyoto</td><td>Travel, Kinkaku-ji</td></tr>
+                    <tr><td>Apr 19</td><td>Kyoto</td><td>Fushimi Inari, Proposal</td></tr>
+                    <tr><td>Apr 20</td><td>Nara</td><td>Deer Park, Temples</td></tr>
+                    <tr><td>Apr 21</td><td>Tokyo</td><td>Return, Bay Cruise</td></tr>
+                </table>
+            </div>
+        </div>
+
+        <button class="collapsible">🗣️ Essential Phrases</button>
+        <div class="content">
+            <div class="table-wrapper">
+                <table>
+                    <tr><th>English</th><th>Japanese</th></tr>
+                    <tr><td>Thank you</td><td>ありがとう</td></tr>
+                    <tr><td>Excuse me</td><td>すみません</td></tr>
+                    <tr><td>Please</td><td>お願いします</td></tr>
+                    <tr><td>Where is...</td><td>...はどこですか</td></tr>
+                    <tr><td>Help!</td><td>助けて!</td></tr>
+                </table>
+            </div>
+        </div>
+
+        <button class="collapsible">🚅 Transportation</button>
+        <div class="content">
+            <div class="card">
+                <h3>Key Routes</h3>
+                <ul>
+                    <li>Tokyo-Kyoto: 2h15m</li>
+                    <li>Kyoto-Nara: 45m</li>
+                    <li>Last trains: ~midnight</li>
+                </ul>
+                <p><strong>JR Pass:</strong> Activate April 15</p>
+            </div>
+        </div>
+
+        <button class="collapsible">💍 Proposal Plan</button>
+        <div class="content">
+            <div class="card">
+                <h3>April 19 Timeline</h3>
+                <ul>
+                    <li>4:00 PM: Head to Maruyama Park</li>
+                    <li>5:30 PM: Arrive at spot</li>
+                    <li>7:00 PM: Dinner at Kikunoi Roan</li>
+                </ul>
+                <p><strong>Backup:</strong> Gion Shirakawa area</p>
+            </div>
+        </div>
+
+        <button class="collapsible">💰 Budget Tracker</button>
+        <div class="content">
+            <div class="table-wrapper">
+                <table>
+                    <tr><th>Item</th><th>Budget</th></tr>
+                    <tr><td>Hotels</td><td>$1500-2000</td></tr>
+                    <tr><td>Transport</td><td>$600-800</td></tr>
+                    <tr><td>Food</td><td>$800-1000</td></tr>
+                    <tr><td>Activities</td><td>$600-800</td></tr>
+                    <tr><td>Shopping</td><td>$500-400</td></tr>
+                </table>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        // Add click handlers for collapsible sections
+        var coll = document.getElementsByClassName("collapsible");
+        for (var i = 0; i < coll.length; i++) {
+            coll[i].addEventListener("click", function() {
+                this.classList.toggle("active");
+                var content = this.nextElementSibling;
+                if (content.style.display === "block") {
+                    content.style.display = "none";
+                } else {
+                    content.style.display = "block";
+                }
+            });
+        }
+
+        // Function to show specific section
+        function showSection(id) {
+            document.getElementById(id).scrollIntoView({
+                behavior: 'smooth'
+            });
+        }
+    </script>
+</body>
+</html>
--- a/examples/japan-travel-plan/japan_travel_handbook_print.html
+++ b/examples/japan-travel-plan/japan_travel_handbook_print.html
@ -0,0 +1,162 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Japan Travel Handbook (Print Version) - April 15-23, 2024</title>
+    <style>
+        @media print {
+            body {
+                font-family: Arial, sans-serif;
+                font-size: 11pt;
+                line-height: 1.4;
+                margin: 0.5in;
+            }
+            h1 { font-size: 16pt; }
+            h2 { font-size: 14pt; }
+            h3 { font-size: 12pt; }
+
+            .section {
+                margin: 10px 0;
+                padding: 5px;
+                border: 1px solid #ccc;
+                page-break-inside: avoid;
+            }
+            .no-break {
+                page-break-inside: avoid;
+            }
+
+            table {
+                border-collapse: collapse;
+                width: 100%;
+                margin: 10px 0;
+            }
+            td, th {
+                border: 1px solid #000;
+                padding: 4px;
+                font-size: 10pt;
+            }
+            ul, ol {
+                margin: 5px 0;
+                padding-left: 20px;
+            }
+            li {
+                margin: 3px 0;
+            }
+            .page-break {
+                page-break-before: always;
+            }
+        }
+        /* Screen styles */
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.4;
+            margin: 20px;
+            max-width: 800px;
+            margin: 0 auto;
+        }
+
+        .section {
+            margin: 15px 0;
+            padding: 15px;
+            border: 1px solid #ccc;
+            border-radius: 5px;
+        }
+
+        table {
+            border-collapse: collapse;
+            width: 100%;
+            margin: 10px 0;
+        }
+        td, th {
+            border: 1px solid #000;
+            padding: 8px;
+        }
+        @media screen {
+            .page-break {
+                margin: 30px 0;
+                border-top: 2px dashed #ccc;
+            }
+        }
+    </style>
+</head>
+<body>
+    <h1>Japan Travel Handbook (Print Version)</h1>
+    <p><strong>Trip Dates:</strong> April 15-23, 2024</p>
+
+    <div class="section">
+        <h2>Emergency Contacts & Important Information</h2>
+        <ul>
+            <li>Emergency in Japan: 119 (Ambulance/Fire) / 110 (Police)</li>
+            <li>US Embassy Tokyo: +81-3-3224-5000</li>
+            <li>Tourist Information Hotline: 03-3201-3331</li>
+            <li>Your Travel Insurance: [Write number here]</li>
+        </ul>
+    </div>
+
+    <div class="section">
+        <h2>Daily Itinerary Summary</h2>
+        <table>
+            <tr><th>Date</th><th>Location</th><th>Key Activities</th></tr>
+            <tr><td>Apr 15</td><td>Tokyo</td><td>Arrival, Shinjuku area exploration</td></tr>
+            <tr><td>Apr 16</td><td>Tokyo</td><td>Meiji Shrine, Harajuku, Senso-ji, Skytree</td></tr>
+            <tr><td>Apr 17</td><td>Tokyo</td><td>Tea Ceremony, Budokan, Yanaka Ginza</td></tr>
+            <tr><td>Apr 18</td><td>Kyoto</td><td>Travel to Kyoto, Kinkaku-ji, Gion</td></tr>
+            <tr><td>Apr 19</td><td>Kyoto</td><td>Fushimi Inari, Arashiyama, Evening Proposal</td></tr>
+            <tr><td>Apr 20</td><td>Nara/Kyoto</td><td>Nara Park day trip, deer feeding</td></tr>
+            <tr><td>Apr 21</td><td>Tokyo</td><td>Return to Tokyo, bay cruise</td></tr>
+        </table>
+    </div>
+
+    <div class="page-break"></div>
+
+    <div class="section">
+        <h2>Essential Japanese Phrases</h2>
+        <table>
+            <tr><th>English</th><th>Japanese</th><th>When to Use</th></tr>
+            <tr><td>Arigatou gozaimasu</td><td>ありがとうございます</td><td>Thank you (formal)</td></tr>
+            <tr><td>Sumimasen</td><td>すみません</td><td>Excuse me/Sorry</td></tr>
+            <tr><td>Onegaishimasu</td><td>お願いします</td><td>Please</td></tr>
+            <tr><td>Toire wa doko desu ka?</td><td>トイレはどこですか？</td><td>Where is the bathroom?</td></tr>
+            <tr><td>Eigo ga hanasemasu ka?</td><td>英語が話せますか？</td><td>Do you speak English?</td></tr>
+        </table>
+    </div>
+
+    <div class="section">
+        <h2>Transportation Notes</h2>
+        <ul>
+            <li>JR Pass: Activate on April 15</li>
+            <li>Tokyo-Kyoto Shinkansen: ~2h15m</li>
+            <li>Kyoto-Nara Local Train: ~45m</li>
+            <li>Last trains: Usually around midnight</li>
+            <li>Keep ¥3000 for unexpected taxi rides</li>
+        </ul>
+    </div>
+
+    <div class="page-break"></div>
+
+    <div class="section no-break">
+        <h2>Proposal Day Timeline (April 19)</h2>
+        <table>
+            <tr><th>Time</th><th>Activity</th><th>Notes</th></tr>
+            <tr><td>4:00 PM</td><td>Head to Maruyama Park</td><td>Check weather first</td></tr>
+            <tr><td>4:30 PM</td><td>Tea house visit</td><td>Light refreshments</td></tr>
+            <tr><td>5:15 PM</td><td>Park walk begins</td><td>Head to weeping cherry tree</td></tr>
+            <tr><td>5:30 PM</td><td>Arrive at spot</td><td>Find quiet area</td></tr>
+            <tr><td>7:00 PM</td><td>Dinner reservation</td><td>Kikunoi Roan</td></tr>
+        </table>
+        <p><strong>Backup Location:</strong> Gion Shirakawa area (in case of rain)</p>
+    </div>
+
+    <div class="section">
+        <h2>Quick Reference Budget</h2>
+        <table>
+            <tr><th>Item</th><th>Budget (USD)</th><th>Notes</th></tr>
+            <tr><td>Hotels</td><td>1500-2000</td><td>Pre-booked</td></tr>
+            <tr><td>Transport</td><td>600-800</td><td>Including JR Pass</td></tr>
+            <tr><td>Food</td><td>800-1000</td><td>~$60/person/day</td></tr>
+            <tr><td>Activities</td><td>600-800</td><td>Including tea ceremony</td></tr>
+            <tr><td>Shopping</td><td>500-400</td><td>Souvenirs/gifts</td></tr>
+        </table>
+    </div>
+</body>
+</html>
--- a/examples/pictures/japan-travel-plan-1.png
+++ b/examples/pictures/japan-travel-plan-1.png
--- a/examples/pictures/japan-travel-plan-2.png
+++ b/examples/pictures/japan-travel-plan-2.png
--- a/examples/readme.md
+++ b/examples/readme.md
@ -0,0 +1,16 @@
+# Examples
+
+We put some examples in the `examples` directory. All the examples use the same prompt
+as [Manus](https://manus.im/?utm_source=ai-bot.cn).
+
+The Model we use is `claude3.5`.
+
+## Japan Travel Plan
+**Prompt**：
+```
+I need a 7-day Japan itinerary for April 15-23 from Seattle, with a $2500-5000 budget for my fiancée and me. We love historical sites, hidden gems, and Japanese culture (kendo, tea ceremonies, Zen meditation). We want to see Nara's deer and explore cities on foot. I plan to propose during this trip and need a special location recommendation. Please provide a detailed itinerary and a simple HTML travel handbook with maps, attraction descriptions, essential Japanese phrases, and travel tips we can reference throughout our journey.
+```
+**preview**：
+![alt text](./pictures/japan-travel-plan-1.png)
+
+![alt text](./pictures/japan-travel-plan-2.png)
--- a/main.py
+++ b/main.py
@ -6,17 +6,17 @@ from app.logger import logger

 async def main():
    agent = Manus()
-    while True:
-        try:
-            prompt = input("Enter your prompt (or 'exit' to quit): ")
-            if prompt.lower() == "exit":
-                logger.info("Goodbye!")
-                break
-            logger.warning("Processing your request...")
-            await agent.run(prompt)
-        except KeyboardInterrupt:
-            logger.warning("Goodbye!")
-            break
+    try:
+        prompt = input("Enter your prompt: ")
+        if not prompt.strip():
+            logger.warning("Empty prompt provided.")
+            return
+
+        logger.warning("Processing your request...")
+        await agent.run(prompt)
+        logger.info("Request processing completed.")
+    except KeyboardInterrupt:
+        logger.warning("Operation interrupted.")


 if __name__ == "__main__":
--- a/requirements.txt
+++ b/requirements.txt
@ -1,10 +1,12 @@
-pydantic~=2.10.4
-openai~=1.58.1
+pydantic~=2.10.6
+openai~=1.66.3
 tenacity~=9.0.0
 pyyaml~=6.0.2
 loguru~=0.7.3
 numpy
 datasets~=3.2.0
+fastapi~=0.115.11
+tiktoken~=0.9.0

 html2text~=2024.2.26
 gymnasium~=1.0.0
@ -14,11 +16,13 @@ uvicorn~=0.34.0
 unidiff~=0.7.5
 browser-use~=0.1.40
 googlesearch-python~=1.3.0
+baidusearch~=1.0.3
+duckduckgo_search~=7.5.1

 aiofiles~=24.1.0
 pydantic_core~=2.27.2
 colorama~=0.4.6
-playwright~=1.49.1
+playwright~=1.50.0

 docker~=7.1.0
 pytest~=8.3.5
--- a/run_flow.py
+++ b/run_flow.py
@ -1,32 +1,49 @@
 import asyncio
+import time

 from app.agent.manus import Manus
 from app.flow.base import FlowType
 from app.flow.flow_factory import FlowFactory
+from app.logger import logger


 async def run_flow():
-    agent = Manus()
+    agents = {
+        "manus": Manus(),
+    }
+
+    try:
+        prompt = input("Enter your prompt: ")
+
+        if prompt.strip().isspace() or not prompt:
+            logger.warning("Empty prompt provided.")
+            return
+
+        flow = FlowFactory.create_flow(
+            flow_type=FlowType.PLANNING,
+            agents=agents,
+        )
+        logger.warning("Processing your request...")

-    while True:
        try:
-            prompt = input("Enter your prompt (or 'exit' to quit): ")
-            if prompt.lower() == "exit":
-                print("Goodbye!")
-                break
-
-            flow = FlowFactory.create_flow(
-                flow_type=FlowType.PLANNING,
-                agents=agent,
+            start_time = time.time()
+            result = await asyncio.wait_for(
+                flow.execute(prompt),
+                timeout=3600,  # 60 minute timeout for the entire execution
+            )
+            elapsed_time = time.time() - start_time
+            logger.info(f"Request processed in {elapsed_time:.2f} seconds")
+            logger.info(result)
+        except asyncio.TimeoutError:
+            logger.error("Request processing timed out after 1 hour")
+            logger.info(
+                "Operation terminated due to timeout. Please try a simpler request."
            )

-            print("Processing your request...")
-            result = await flow.execute(prompt)
-            print(result)
-
-        except KeyboardInterrupt:
-            print("Goodbye!")
-            break
+    except KeyboardInterrupt:
+        logger.info("Operation cancelled by user.")
+    except Exception as e:
+        logger.error(f"Error: {str(e)}")


 if __name__ == "__main__":
--- a/setup.py
+++ b/setup.py
@ -1,4 +1,5 @@
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
+

 with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()
@ -15,7 +16,7 @@ setup(
    packages=find_packages(),
    install_requires=[
        "pydantic~=2.10.4",
-        "openai~=1.58.1",
+        "openai>=1.58.1,<1.67.0",
        "tenacity~=9.0.0",
        "pyyaml~=6.0.2",
        "loguru~=0.7.3",
@ -30,7 +31,7 @@ setup(
        "browser-use~=0.1.40",
        "googlesearch-python~=1.3.0",
        "aiofiles~=24.1.0",
-        "pydantic_core~=2.27.2",
+        "pydantic_core>=2.27.2,<2.28.0",
        "colorama~=0.4.6",
    ],
    classifiers=[
--- a/tests/sandbox/test_client.py
+++ b/tests/sandbox/test_client.py
@ -5,7 +5,7 @@ from typing import AsyncGenerator
 import pytest
 import pytest_asyncio

-from app.config import SandboxConfig
+from app.config import SandboxSettings
 from app.sandbox.client import LocalSandboxClient, create_sandbox_client


@ -29,7 +29,7 @@ def temp_dir() -> Path:
@pytest.mark.asyncio
 async def test_sandbox_creation(local_client: LocalSandboxClient):
    """Tests sandbox creation with specific configuration."""
-    config = SandboxConfig(
+    config = SandboxSettings(
        image="python:3.10-slim",
        work_dir="/workspace",
        memory_limit="512m",
--- a/tests/sandbox/test_sandbox.py
+++ b/tests/sandbox/test_sandbox.py
@ -1,13 +1,13 @@
 import pytest
 import pytest_asyncio

-from app.sandbox.core.sandbox import DockerSandbox, SandboxConfig
+from app.sandbox.core.sandbox import DockerSandbox, SandboxSettings


@pytest.fixture(scope="module")
 def sandbox_config():
    """Creates sandbox configuration for testing."""
-    return SandboxConfig(
+    return SandboxSettings(
        image="python:3.10-slim",
        work_dir="/workspace",
        memory_limit="1g",
@ -141,7 +141,7 @@ async def test_sandbox_cleanup(sandbox_config):
 async def test_sandbox_error_handling():
    """Tests error handling with invalid configuration."""
    # Test invalid configuration
-    invalid_config = SandboxConfig(image="nonexistent:latest", work_dir="/invalid")
+    invalid_config = SandboxSettings(image="nonexistent:latest", work_dir="/invalid")

    sandbox = DockerSandbox(invalid_config)
    with pytest.raises(Exception):
--- a/workspace/example.txt
+++ b/workspace/example.txt
@ -0,0 +1 @@
+This is a sample file. Files generated by OpenManus are stored in the current folder by default.
				`@ -0,0 +1 @@`
				`This is a sample file. Files generated by OpenManus are stored in the current folder by default.`