From aa512fac6e6f064315b95a87a94891f52839d6a8 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Tue, 18 Mar 2025 11:46:35 +0800 Subject: [PATCH 01/19] refactor(app): Complete exception logging in LLM.ask --- app/llm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/llm.py b/app/llm.py index 18a13af..f78a7f7 100644 --- a/app/llm.py +++ b/app/llm.py @@ -309,8 +309,8 @@ class LLM: elif isinstance(oe, APIError): logger.error(f"API error: {oe}") raise - except Exception as e: - logger.error(f"Unexpected error in ask: {e}") + except Exception: + logger.exception(f"Unexpected error in ask") raise @retry( From ca612699ec0d2b1942c27a0ccd2c1adae39a1a48 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Tue, 18 Mar 2025 11:53:47 +0800 Subject: [PATCH 02/19] refactor(app): explicitly specify LLM request parameters to allow typing --- app/llm.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/app/llm.py b/app/llm.py index f78a7f7..b527058 100644 --- a/app/llm.py +++ b/app/llm.py @@ -263,23 +263,23 @@ class LLM: if not stream: # Non-streaming request - params["stream"] = False - - response = await self.client.chat.completions.create(**params) + response = await self.client.chat.completions.create( + **params, stream=False + ) if not response.choices or not response.choices[0].message.content: raise ValueError("Empty or invalid response from LLM") # Update token counts - self.update_token_count(response.usage.prompt_tokens) + if response.usage: + self.update_token_count(response.usage.prompt_tokens) return response.choices[0].message.content # Streaming request, For streaming, update estimated token count before making the request self.update_token_count(input_tokens) - params["stream"] = True - response = await self.client.chat.completions.create(**params) + response = await self.client.chat.completions.create(**params, stream=True) collected_messages = [] async for chunk in response: @@ -292,6 +292,8 @@ class LLM: if not full_response: raise ValueError("Empty response from streaming LLM") + # TODO Update token counts + return full_response except TokenLimitExceeded: From cf7d6c120775fce34366661a611bf73352e9f8fc Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Tue, 18 Mar 2025 13:36:15 +0800 Subject: [PATCH 03/19] chore(app): Update error logging to use exception details --- app/llm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/llm.py b/app/llm.py index b527058..6a147c4 100644 --- a/app/llm.py +++ b/app/llm.py @@ -299,11 +299,11 @@ class LLM: except TokenLimitExceeded: # Re-raise token limit errors without logging raise - except ValueError as ve: - logger.error(f"Validation error: {ve}") + except ValueError: + logger.exception(f"Validation error") raise except OpenAIError as oe: - logger.error(f"OpenAI API error: {oe}") + logger.exception(f"OpenAI API error") if isinstance(oe, AuthenticationError): logger.error("Authentication failed. Check API key.") elif isinstance(oe, RateLimitError): From b95244a60b461748a59894f06df7f62d84e2730f Mon Sep 17 00:00:00 2001 From: via Date: Tue, 18 Mar 2025 15:40:25 +0800 Subject: [PATCH 04/19] add bing search --- app/tool/search/__init__.py | 2 + app/tool/search/bing_search.py | 153 +++++++++++++++++++++++++++++++++ app/tool/web_search.py | 2 + 3 files changed, 157 insertions(+) create mode 100644 app/tool/search/bing_search.py diff --git a/app/tool/search/__init__.py b/app/tool/search/__init__.py index 4f486ac..8b6cfdb 100644 --- a/app/tool/search/__init__.py +++ b/app/tool/search/__init__.py @@ -2,6 +2,7 @@ from app.tool.search.baidu_search import BaiduSearchEngine from app.tool.search.base import WebSearchEngine from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine from app.tool.search.google_search import GoogleSearchEngine +from app.tool.search.bing_search import BingSearchEngine __all__ = [ @@ -9,4 +10,5 @@ __all__ = [ "BaiduSearchEngine", "DuckDuckGoSearchEngine", "GoogleSearchEngine", + "BingSearchEngine", ] diff --git a/app/tool/search/bing_search.py b/app/tool/search/bing_search.py new file mode 100644 index 0000000..53e94c9 --- /dev/null +++ b/app/tool/search/bing_search.py @@ -0,0 +1,153 @@ +import asyncio +from typing import List +import requests +from app.logger import logger +from bs4 import BeautifulSoup +from app.tool.search.base import WebSearchEngine + +ABSTRACT_MAX_LENGTH = 300 + +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net', + 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3', + 'Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7', + 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866' +] + +HEADERS = { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": USER_AGENTS[0], + "Referer": "https://www.bing.com/", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "zh-CN,zh;q=0.9" +} + +BING_HOST_URL = "https://www.bing.com" +BING_SEARCH_URL = "https://www.bing.com/search?q=" + + +class BingSearchEngine(WebSearchEngine): + session: requests.Session = None + + def __init__(self, **data): + """Initialize the BingSearch tool with a requests session.""" + super().__init__(**data) + self.session = requests.Session() + self.session.headers.update(HEADERS) + + def _search_sync(self, query: str, num_results: int = 10) -> List[str]: + """ + Synchronous Bing search implementation to retrieve a list of URLs matching a query. + + Args: + query (str): The search query to submit to Bing. Must not be empty. + num_results (int, optional): The maximum number of URLs to return. Defaults to 10. + + Returns: + List[str]: A list of URLs from the search results, capped at `num_results`. + Returns an empty list if the query is empty or no results are found. + + Notes: + - Pagination is handled by incrementing the `first` parameter and following `next_url` links. + - If fewer results than `num_results` are available, all found URLs are returned. + """ + if not query: + return [] + + list_result = [] + first = 1 + next_url = BING_SEARCH_URL + query + + while len(list_result) < num_results: + data, next_url = self._parse_html(next_url, rank_start=len(list_result), first=first) + if data: + list_result.extend([item["url"] for item in data]) + if not next_url: + break + first += 10 + + return list_result[:num_results] + + def _parse_html(self, url: str, rank_start: int = 0, first: int = 1) -> tuple: + """ + Parse Bing search result HTML synchronously to extract search results and the next page URL. + + Args: + url (str): The URL of the Bing search results page to parse. + rank_start (int, optional): The starting rank for numbering the search results. Defaults to 0. + first (int, optional): Unused parameter (possibly legacy). Defaults to 1. + Returns: + tuple: A tuple containing: + - list: A list of dictionaries with keys 'title', 'abstract', 'url', and 'rank' for each result. + - str or None: The URL of the next results page, or None if there is no next page. + Example: + This function is called by `execute` in the following way: + ```python + results, next_url = self._parse_html(url, rank_start=0) + ``` + """ + try: + res = self.session.get(url=url) + res.encoding = "utf-8" + root = BeautifulSoup(res.text, "lxml") + + list_data = [] + ol_results = root.find("ol", id="b_results") + if not ol_results: + return [], None + + for li in ol_results.find_all("li", class_="b_algo"): + title = '' + url = '' + abstract = '' + try: + h2 = li.find("h2") + if h2: + title = h2.text.strip() + url = h2.a['href'].strip() + + p = li.find("p") + if p: + abstract = p.text.strip() + + if ABSTRACT_MAX_LENGTH and len(abstract) > ABSTRACT_MAX_LENGTH: + abstract = abstract[:ABSTRACT_MAX_LENGTH] + + rank_start += 1 + list_data.append({"title": title, "abstract": abstract, "url": url, "rank": rank_start}) + except Exception: + continue + + next_btn = root.find("a", title="Next page") + if not next_btn: + return list_data, None + + next_url = BING_HOST_URL + next_btn["href"] + return list_data, next_url + except Exception as e: + logger.warning(f"Error parsing HTML: {e}") + return [], None + + async def execute(self, query: str, num_results: int = 10) -> List[str]: + """ + Execute a Bing search and return a list of URLs asynchronously. + + Args: + query (str): The search query to submit to Bing. + num_results (int, optional): The number of search results to return. Default is 10. + + Returns: + List[str]: A list of URLs matching the search query. + """ + loop = asyncio.get_event_loop() + links = await loop.run_in_executor( + None, lambda: self._search_sync(query, num_results=num_results) + ) + return links diff --git a/app/tool/web_search.py b/app/tool/web_search.py index 7b1018b..99d8916 100644 --- a/app/tool/web_search.py +++ b/app/tool/web_search.py @@ -10,6 +10,7 @@ from app.tool.search import ( DuckDuckGoSearchEngine, GoogleSearchEngine, WebSearchEngine, + BingSearchEngine ) @@ -37,6 +38,7 @@ class WebSearch(BaseTool): "google": GoogleSearchEngine(), "baidu": BaiduSearchEngine(), "duckduckgo": DuckDuckGoSearchEngine(), + "bing": BingSearchEngine() } async def execute(self, query: str, num_results: int = 10) -> List[str]: From b62bf92e19849915cb9ca0d878bd27e3e00281ad Mon Sep 17 00:00:00 2001 From: via Date: Tue, 18 Mar 2025 16:57:29 +0800 Subject: [PATCH 05/19] modified bing search --- app/tool/search/bing_search.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/app/tool/search/bing_search.py b/app/tool/search/bing_search.py index 53e94c9..c5eaa86 100644 --- a/app/tool/search/bing_search.py +++ b/app/tool/search/bing_search.py @@ -1,4 +1,3 @@ -import asyncio from typing import List import requests from app.logger import logger @@ -45,13 +44,13 @@ class BingSearchEngine(WebSearchEngine): def _search_sync(self, query: str, num_results: int = 10) -> List[str]: """ Synchronous Bing search implementation to retrieve a list of URLs matching a query. - + Args: query (str): The search query to submit to Bing. Must not be empty. num_results (int, optional): The maximum number of URLs to return. Defaults to 10. Returns: - List[str]: A list of URLs from the search results, capped at `num_results`. + List[str]: A list of URLs from the search results, capped at `num_results`. Returns an empty list if the query is empty or no results are found. Notes: @@ -87,11 +86,6 @@ class BingSearchEngine(WebSearchEngine): tuple: A tuple containing: - list: A list of dictionaries with keys 'title', 'abstract', 'url', and 'rank' for each result. - str or None: The URL of the next results page, or None if there is no next page. - Example: - This function is called by `execute` in the following way: - ```python - results, next_url = self._parse_html(url, rank_start=0) - ``` """ try: res = self.session.get(url=url) @@ -135,19 +129,6 @@ class BingSearchEngine(WebSearchEngine): logger.warning(f"Error parsing HTML: {e}") return [], None - async def execute(self, query: str, num_results: int = 10) -> List[str]: - """ - Execute a Bing search and return a list of URLs asynchronously. - - Args: - query (str): The search query to submit to Bing. - num_results (int, optional): The number of search results to return. Default is 10. - - Returns: - List[str]: A list of URLs matching the search query. - """ - loop = asyncio.get_event_loop() - links = await loop.run_in_executor( - None, lambda: self._search_sync(query, num_results=num_results) - ) - return links + def perform_search(self, query, num_results=10, *args, **kwargs): + """Bing search engine.""" + return self._search_sync(query, num_results=num_results) \ No newline at end of file From 0654d36e4050b09fad031c5c7c53e98faf71adf7 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Tue, 18 Mar 2025 17:23:09 +0800 Subject: [PATCH 06/19] ci: update Markdown issue templates to forms --- .../ISSUE_TEMPLATE/request_new_features.md | 14 ------ .../ISSUE_TEMPLATE/request_new_features.yaml | 21 +++++++++ .github/ISSUE_TEMPLATE/show_me_the_bug.md | 25 ----------- .github/ISSUE_TEMPLATE/show_me_the_bug.yaml | 44 +++++++++++++++++++ 4 files changed, 65 insertions(+), 39 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/request_new_features.md create mode 100644 .github/ISSUE_TEMPLATE/request_new_features.yaml delete mode 100644 .github/ISSUE_TEMPLATE/show_me_the_bug.md create mode 100644 .github/ISSUE_TEMPLATE/show_me_the_bug.yaml diff --git a/.github/ISSUE_TEMPLATE/request_new_features.md b/.github/ISSUE_TEMPLATE/request_new_features.md deleted file mode 100644 index c191adb..0000000 --- a/.github/ISSUE_TEMPLATE/request_new_features.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: "🤔 Request new features" -about: Suggest ideas or features you’d like to see implemented in OpenManus. -title: '' -labels: kind/features -assignees: '' ---- - -**Feature description** - - -**Your Feature** - - diff --git a/.github/ISSUE_TEMPLATE/request_new_features.yaml b/.github/ISSUE_TEMPLATE/request_new_features.yaml new file mode 100644 index 0000000..749ab7f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/request_new_features.yaml @@ -0,0 +1,21 @@ +name: "🤔 Request new features" +description: Suggest ideas or features you’d like to see implemented in OpenManus. +labels: enhancement +body: + - type: textarea + id: feature-description + attributes: + label: Feature description + description: | + Provide a clear and concise description of the proposed feature + validations: + required: true + - type: textarea + id: your-feature + attributes: + label: Your Feature + description: | + Explain your idea or implementation process, if any. Optionally, include a Pull Request URL. + Ensure accompanying docs/tests/examples are provided for review. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/show_me_the_bug.md b/.github/ISSUE_TEMPLATE/show_me_the_bug.md deleted file mode 100644 index a3d8700..0000000 --- a/.github/ISSUE_TEMPLATE/show_me_the_bug.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -name: "🪲 Show me the Bug" -about: Report a bug encountered while using OpenManus and seek assistance. -title: '' -labels: kind/bug -assignees: '' ---- - -**Bug description** - - -**Bug solved method** - - - -**Environment information** - - -- System version: -- Python version: -- OpenManus version or branch: -- Installation method (e.g., `pip install -r requirements.txt` or `pip install -e .`): - -**Screenshots or logs** - diff --git a/.github/ISSUE_TEMPLATE/show_me_the_bug.yaml b/.github/ISSUE_TEMPLATE/show_me_the_bug.yaml new file mode 100644 index 0000000..de9298e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/show_me_the_bug.yaml @@ -0,0 +1,44 @@ +name: "🪲 Show me the Bug" +description: Report a bug encountered while using OpenManus and seek assistance. +labels: bug +body: + - type: textarea + id: bug-description + attributes: + label: Bug Description + description: | + Clearly describe the bug you encountered + validations: + required: true + - type: textarea + id: solve-method + attributes: + label: Bug solved method + description: | + If resolved, explain the solution. Optionally, include a Pull Request URL. + If unresolved, provide additional details to aid investigation + validations: + required: true + - type: textarea + id: environment-information + attributes: + label: Environment information + description: | + System: e.g., Ubuntu 22.04 + Python: e.g., 3.12 + OpenManus version: e.g., 0.1.0 + value: | + - System version: + - Python version: + - OpenManus version or branch: + - Installation method (e.g., `pip install -r requirements.txt` or `pip install -e .`): + validations: + required: true + - type: textarea + id: extra-information + attributes: + label: Extra information + description: | + For example, attach screenshots or logs to help diagnose the issue + validations: + required: false From 2fad2904d7e8f922464e1df6d25f119bc7b778b1 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Tue, 18 Mar 2025 21:10:43 +0800 Subject: [PATCH 07/19] ci(pr-autodiff): add Chinese explicit shot --- .github/workflows/pr-autodiff.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/pr-autodiff.yaml b/.github/workflows/pr-autodiff.yaml index ed218dc..85c79d0 100644 --- a/.github/workflows/pr-autodiff.yaml +++ b/.github/workflows/pr-autodiff.yaml @@ -86,6 +86,17 @@ jobs: ### Spelling/Offensive Content Check - No spelling mistakes or offensive content found in the code or comments. + + ## 中文(简体) + - 新增了 `ABC` 类 + - `foo` 模块中的 `f()` 行为已修复 + + ### 评论高亮 + - `config.toml` 需要正确配置才能确保新功能正常运行。 + + ### 内容检查 + - 没有发现代码或注释中的拼写错误或不当措辞。 + 3. Highlight non-English comments 4. Check for spelling/offensive content''' From 47adb33bd9352450cb5c070f62c26832d8862207 Mon Sep 17 00:00:00 2001 From: via Date: Wed, 19 Mar 2025 09:13:47 +0800 Subject: [PATCH 08/19] Apply black and isort formatting --- app/tool/search/__init__.py | 3 +-- app/tool/search/bing_search.py | 49 +++++++++++++++++++++------------- app/tool/web_search.py | 8 +++--- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/app/tool/search/__init__.py b/app/tool/search/__init__.py index 8b6cfdb..70ea3d7 100644 --- a/app/tool/search/__init__.py +++ b/app/tool/search/__init__.py @@ -1,9 +1,8 @@ from app.tool.search.baidu_search import BaiduSearchEngine from app.tool.search.base import WebSearchEngine +from app.tool.search.bing_search import BingSearchEngine from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine from app.tool.search.google_search import GoogleSearchEngine -from app.tool.search.bing_search import BingSearchEngine - __all__ = [ "WebSearchEngine", diff --git a/app/tool/search/bing_search.py b/app/tool/search/bing_search.py index c5eaa86..701841b 100644 --- a/app/tool/search/bing_search.py +++ b/app/tool/search/bing_search.py @@ -1,22 +1,24 @@ from typing import List + import requests -from app.logger import logger from bs4 import BeautifulSoup + +from app.logger import logger from app.tool.search.base import WebSearchEngine ABSTRACT_MAX_LENGTH = 300 USER_AGENTS = [ - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', - 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net', - 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3', - 'Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7', - 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866' + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", + "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net", + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3", + "Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7", + "Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201", + "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866", ] HEADERS = { @@ -25,7 +27,7 @@ HEADERS = { "User-Agent": USER_AGENTS[0], "Referer": "https://www.bing.com/", "Accept-Encoding": "gzip, deflate", - "Accept-Language": "zh-CN,zh;q=0.9" + "Accept-Language": "zh-CN,zh;q=0.9", } BING_HOST_URL = "https://www.bing.com" @@ -65,7 +67,9 @@ class BingSearchEngine(WebSearchEngine): next_url = BING_SEARCH_URL + query while len(list_result) < num_results: - data, next_url = self._parse_html(next_url, rank_start=len(list_result), first=first) + data, next_url = self._parse_html( + next_url, rank_start=len(list_result), first=first + ) if data: list_result.extend([item["url"] for item in data]) if not next_url: @@ -98,14 +102,14 @@ class BingSearchEngine(WebSearchEngine): return [], None for li in ol_results.find_all("li", class_="b_algo"): - title = '' - url = '' - abstract = '' + title = "" + url = "" + abstract = "" try: h2 = li.find("h2") if h2: title = h2.text.strip() - url = h2.a['href'].strip() + url = h2.a["href"].strip() p = li.find("p") if p: @@ -115,7 +119,14 @@ class BingSearchEngine(WebSearchEngine): abstract = abstract[:ABSTRACT_MAX_LENGTH] rank_start += 1 - list_data.append({"title": title, "abstract": abstract, "url": url, "rank": rank_start}) + list_data.append( + { + "title": title, + "abstract": abstract, + "url": url, + "rank": rank_start, + } + ) except Exception: continue @@ -131,4 +142,4 @@ class BingSearchEngine(WebSearchEngine): def perform_search(self, query, num_results=10, *args, **kwargs): """Bing search engine.""" - return self._search_sync(query, num_results=num_results) \ No newline at end of file + return self._search_sync(query, num_results=num_results) diff --git a/app/tool/web_search.py b/app/tool/web_search.py index 99d8916..3647883 100644 --- a/app/tool/web_search.py +++ b/app/tool/web_search.py @@ -6,11 +6,11 @@ from tenacity import retry, stop_after_attempt, wait_exponential from app.config import config from app.tool.base import BaseTool from app.tool.search import ( - BaiduSearchEngine, + BaiduSearchEngine, + BingSearchEngine, DuckDuckGoSearchEngine, GoogleSearchEngine, - WebSearchEngine, - BingSearchEngine + WebSearchEngine ) @@ -38,7 +38,7 @@ class WebSearch(BaseTool): "google": GoogleSearchEngine(), "baidu": BaiduSearchEngine(), "duckduckgo": DuckDuckGoSearchEngine(), - "bing": BingSearchEngine() + "bing": BingSearchEngine(), } async def execute(self, query: str, num_results: int = 10) -> List[str]: From b9fdade6e42afacdc7622416d8d1856c43336e2d Mon Sep 17 00:00:00 2001 From: via Date: Wed, 19 Mar 2025 09:15:51 +0800 Subject: [PATCH 09/19] Apply black and isort formatting --- app/tool/web_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/tool/web_search.py b/app/tool/web_search.py index 3647883..8b80e46 100644 --- a/app/tool/web_search.py +++ b/app/tool/web_search.py @@ -6,7 +6,7 @@ from tenacity import retry, stop_after_attempt, wait_exponential from app.config import config from app.tool.base import BaseTool from app.tool.search import ( - BaiduSearchEngine, + BaiduSearchEngine, BingSearchEngine, DuckDuckGoSearchEngine, GoogleSearchEngine, From 1279d77ccae1dc1c38c38ec7f9d01d84230c6bf8 Mon Sep 17 00:00:00 2001 From: minbang930 Date: Wed, 19 Mar 2025 10:52:58 +0900 Subject: [PATCH 10/19] update api_type field description to include Ollama Clarify the description of the api_type field in LLMSettings to accurately reflect all supported types including Azure, OpenAI, and Ollama. This makes the documentation consistent with the example configuration. --- app/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/config.py b/app/config.py index 0be771b..011ceb1 100644 --- a/app/config.py +++ b/app/config.py @@ -25,7 +25,7 @@ class LLMSettings(BaseModel): description="Maximum input tokens to use across all requests (None for unlimited)", ) temperature: float = Field(1.0, description="Sampling temperature") - api_type: str = Field(..., description="AzureOpenai or Openai") + api_type: str = Field(..., description="Azure, Openai, or Ollama") api_version: str = Field(..., description="Azure Openai version if AzureOpenai") From 7b38dd7fbc3a6197e88e0fa98a16efe30125bf36 Mon Sep 17 00:00:00 2001 From: liangxinbing <1580466765@qq.com> Date: Wed, 19 Mar 2025 13:18:45 +0800 Subject: [PATCH 11/19] update format_messages --- app/llm.py | 145 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 88 insertions(+), 57 deletions(-) diff --git a/app/llm.py b/app/llm.py index 37de566..a67a86f 100644 --- a/app/llm.py +++ b/app/llm.py @@ -30,6 +30,14 @@ from app.schema import ( REASONING_MODELS = ["o1", "o3-mini"] +MULTIMODAL_MODELS = [ + "gpt-4-vision-preview", + "gpt-4o", + "gpt-4o-mini", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + "claude-3-haiku-20240307", +] class TokenCounter: @@ -259,12 +267,15 @@ class LLM: return "Token limit exceeded" @staticmethod - def format_messages(messages: List[Union[dict, Message]]) -> List[dict]: + def format_messages( + messages: List[Union[dict, Message]], supports_images: bool = False + ) -> List[dict]: """ Format messages for LLM by converting them to OpenAI message format. Args: messages: List of messages that can be either dict or Message objects + supports_images: Flag indicating if the target model supports image inputs Returns: List[dict]: List of formatted messages in OpenAI format @@ -288,54 +299,58 @@ class LLM: if isinstance(message, Message): message = message.to_dict() - if not isinstance(message, dict): + if isinstance(message, dict): + # If message is a dict, ensure it has required fields + if "role" not in message: + raise ValueError("Message dict must contain 'role' field") + + # Process base64 images if present and model supports images + if supports_images and message.get("base64_image"): + # Initialize or convert content to appropriate format + if not message.get("content"): + message["content"] = [] + elif isinstance(message["content"], str): + message["content"] = [ + {"type": "text", "text": message["content"]} + ] + elif isinstance(message["content"], list): + # Convert string items to proper text objects + message["content"] = [ + ( + {"type": "text", "text": item} + if isinstance(item, str) + else item + ) + for item in message["content"] + ] + + # Add the image to content + message["content"].append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{message['base64_image']}" + }, + } + ) + + # Remove the base64_image field + del message["base64_image"] + # If model doesn't support images but message has base64_image, handle gracefully + elif not supports_images and message.get("base64_image"): + # Just remove the base64_image field and keep the text content + del message["base64_image"] + + if "content" in message or "tool_calls" in message: + formatted_messages.append(message) + # else: do not include the message + else: raise TypeError(f"Unsupported message type: {type(message)}") - # Validate required fields - if "role" not in message: - raise ValueError("Message dict must contain 'role' field") - - # Process base64 images if present - if message.get("base64_image"): - # Initialize or convert content to appropriate format - if not message.get("content"): - message["content"] = [] - elif isinstance(message["content"], str): - message["content"] = [{"type": "text", "text": message["content"]}] - elif isinstance(message["content"], list): - # Convert string items to proper text objects - message["content"] = [ - ( - {"type": "text", "text": item} - if isinstance(item, str) - else item - ) - for item in message["content"] - ] - - # Add the image to content - message["content"].append( - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{message['base64_image']}" - }, - } - ) - - # Remove the base64_image field - del message["base64_image"] - - # Only include messages with content or tool_calls - if "content" in message or "tool_calls" in message: - formatted_messages.append(message) - - # Validate all roles - invalid_roles = [ - msg for msg in formatted_messages if msg["role"] not in ROLE_VALUES - ] - if invalid_roles: - raise ValueError(f"Invalid role: {invalid_roles[0]['role']}") + # Validate all messages have required fields + for msg in formatted_messages: + if msg["role"] not in ROLE_VALUES: + raise ValueError(f"Invalid role: {msg['role']}") return formatted_messages @@ -372,12 +387,15 @@ class LLM: Exception: For unexpected errors """ try: - # Format system and user messages + # Check if the model supports images + supports_images = self.model in MULTIMODAL_MODELS + + # Format system and user messages with image support check if system_msgs: - system_msgs = self.format_messages(system_msgs) - messages = system_msgs + self.format_messages(messages) + system_msgs = self.format_messages(system_msgs, supports_images) + messages = system_msgs + self.format_messages(messages, supports_images) else: - messages = self.format_messages(messages) + messages = self.format_messages(messages, supports_images) # Calculate input token count input_tokens = self.count_message_tokens(messages) @@ -499,8 +517,15 @@ class LLM: Exception: For unexpected errors """ try: - # Format messages - formatted_messages = self.format_messages(messages) + # For ask_with_images, we always set supports_images to True because + # this method should only be called with models that support images + if self.model not in MULTIMODAL_MODELS: + raise ValueError( + f"Model {self.model} does not support images. Use a model from {MULTIMODAL_MODELS}" + ) + + # Format messages with image support + formatted_messages = self.format_messages(messages, supports_images=True) # Ensure the last message is from the user to attach images if not formatted_messages or formatted_messages[-1]["role"] != "user": @@ -539,7 +564,10 @@ class LLM: # Add system messages if provided if system_msgs: - all_messages = self.format_messages(system_msgs) + formatted_messages + all_messages = ( + self.format_messages(system_msgs, supports_images=True) + + formatted_messages + ) else: all_messages = formatted_messages @@ -653,12 +681,15 @@ class LLM: if tool_choice not in TOOL_CHOICE_VALUES: raise ValueError(f"Invalid tool_choice: {tool_choice}") + # Check if the model supports images + supports_images = self.model in MULTIMODAL_MODELS + # Format messages if system_msgs: - system_msgs = self.format_messages(system_msgs) - messages = system_msgs + self.format_messages(messages) + system_msgs = self.format_messages(system_msgs, supports_images) + messages = system_msgs + self.format_messages(messages, supports_images) else: - messages = self.format_messages(messages) + messages = self.format_messages(messages, supports_images) # Calculate input token count input_tokens = self.count_message_tokens(messages) From 402355533c33c13f202a27e83984b53e4360633f Mon Sep 17 00:00:00 2001 From: liangxinbing <1580466765@qq.com> Date: Wed, 19 Mar 2025 13:24:49 +0800 Subject: [PATCH 12/19] format code --- app/tool/search/__init__.py | 1 + app/tool/search/bing_search.py | 1 + app/tool/web_search.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/app/tool/search/__init__.py b/app/tool/search/__init__.py index 70ea3d7..fe127ae 100644 --- a/app/tool/search/__init__.py +++ b/app/tool/search/__init__.py @@ -4,6 +4,7 @@ from app.tool.search.bing_search import BingSearchEngine from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine from app.tool.search.google_search import GoogleSearchEngine + __all__ = [ "WebSearchEngine", "BaiduSearchEngine", diff --git a/app/tool/search/bing_search.py b/app/tool/search/bing_search.py index 701841b..46955b5 100644 --- a/app/tool/search/bing_search.py +++ b/app/tool/search/bing_search.py @@ -6,6 +6,7 @@ from bs4 import BeautifulSoup from app.logger import logger from app.tool.search.base import WebSearchEngine + ABSTRACT_MAX_LENGTH = 300 USER_AGENTS = [ diff --git a/app/tool/web_search.py b/app/tool/web_search.py index 8b80e46..cb13934 100644 --- a/app/tool/web_search.py +++ b/app/tool/web_search.py @@ -10,7 +10,7 @@ from app.tool.search import ( BingSearchEngine, DuckDuckGoSearchEngine, GoogleSearchEngine, - WebSearchEngine + WebSearchEngine, ) From b7dcbfecb3f38fb7aa87453acde557209f35d653 Mon Sep 17 00:00:00 2001 From: liangxinbing <1580466765@qq.com> Date: Wed, 19 Mar 2025 13:27:08 +0800 Subject: [PATCH 13/19] update extract_content action for BrowserUseTool --- app/tool/browser_use_tool.py | 62 ++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index 7817aef..7fe8f16 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -418,17 +418,7 @@ class BrowserUseTool(BaseTool, Generic[Context]): # Create prompt for LLM prompt_text = """ -Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. - -Examples of extraction goals: -- Extract all company names -- Extract specific descriptions -- Extract all information about a topic -- Extract links with companies in structured format -- Extract all links - -If the goal is vague, summarize the page. Respond in JSON format. - +Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {goal} Page content: @@ -445,10 +435,54 @@ Page content: messages = [Message.user_message(formatted_prompt)] - # Use LLM to extract content based on the goal - response = await self.llm.ask(messages) + # Define extraction function for the tool + extraction_function = { + "type": "function", + "function": { + "name": "extract_content", + "description": "Extract specific information from a webpage based on a goal", + "parameters": { + "type": "object", + "properties": { + "extracted_content": { + "type": "object", + "description": "The content extracted from the page according to the goal", + } + }, + "required": ["extracted_content"], + }, + }, + } + + # Use LLM to extract content with required function calling + response = await self.llm.ask_tool( + messages, + tools=[extraction_function], + tool_choice="required", + ) + + # Extract content from function call response + if ( + response + and response.tool_calls + and len(response.tool_calls) > 0 + ): + # Get the first tool call arguments + tool_call = response.tool_calls[0] + # Parse the JSON arguments + try: + args = json.loads(tool_call.function.arguments) + extracted_content = args.get("extracted_content", {}) + # Format extracted content as JSON string + content_json = json.dumps( + extracted_content, indent=2, ensure_ascii=False + ) + msg = f"Extracted from page:\n{content_json}\n" + except Exception as e: + msg = f"Error parsing extraction result: {str(e)}\nRaw response: {tool_call.function.arguments}" + else: + msg = "No content was extracted from the page." - msg = f"Extracted from page:\n{response}\n" return ToolResult(output=msg) except Exception as e: # Provide a more helpful error message From d7b3f9a5c3e87f31ff61fe8baa9939b5038c1a7b Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Wed, 19 Mar 2025 13:32:31 +0800 Subject: [PATCH 14/19] fix(pr-autodiff): make sure compare does correctly --- .github/workflows/pr-autodiff.yaml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pr-autodiff.yaml b/.github/workflows/pr-autodiff.yaml index 85c79d0..46c95c6 100644 --- a/.github/workflows/pr-autodiff.yaml +++ b/.github/workflows/pr-autodiff.yaml @@ -15,21 +15,20 @@ jobs: (github.event_name == 'pull_request') || (github.event_name == 'issue_comment' && contains(github.event.comment.body, '!pr-diff') && - (github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && + (github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && github.event.issue.pull_request) steps: - name: Get PR head SHA id: get-pr-sha run: | - if [ "${{ github.event_name }}" == "pull_request" ]; then - echo "pr_sha=${{ github.event.pull_request.head.sha }}" >> $GITHUB_OUTPUT - echo "Retrieved PR head SHA: ${{ github.event.pull_request.head.sha }}" - else - PR_URL="${{ github.event.issue.pull_request.url }}" - SHA=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL | jq -r '.head.sha') - echo "pr_sha=$SHA" >> $GITHUB_OUTPUT - echo "Retrieved PR head SHA from API: $SHA" - fi + PR_URL="${{ github.event.issue.pull_request.url || github.event.pull_request.url }}" + # https://api.github.com/repos/OpenManus/pulls/1 + RESPONSE=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL) + SHA=$(echo $RESPONSE | jq -r '.head.sha') + TARGET_BRANCH=$(echo $RESPONSE | jq -r '.base.ref') + echo "pr_sha=$SHA" >> $GITHUB_OUTPUT + echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT + echo "Retrieved PR head SHA from API: $SHA, target branch: $TARGET_BRANCH" - name: Check out code uses: actions/checkout@v4 with: @@ -49,6 +48,7 @@ jobs: OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} + TARGET_BRANCH: ${{ steps.get-pr-sha.outputs.target_branch }} run: |- cat << 'EOF' > /tmp/_workflow_core.py import os @@ -59,7 +59,7 @@ jobs: def get_diff(): result = subprocess.run( - ['git', 'diff', 'origin/main...HEAD'], + ['git', 'diff', 'origin/' + os.getenv('TARGET_BRANCH') + '...HEAD'], capture_output=True, text=True, check=True) return '\n'.join( line for line in result.stdout.split('\n') From 94e2ab7c86607505807def45ddd3c4709ac85969 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Wed, 19 Mar 2025 14:09:46 +0800 Subject: [PATCH 15/19] fix(llm): accept empty choices as valid response and handle that case gracefully --- app/agent/toolcall.py | 37 +++++++++++++++++++------------------ app/llm.py | 10 +++++++--- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/app/agent/toolcall.py b/app/agent/toolcall.py index 131fd91..b9fab70 100644 --- a/app/agent/toolcall.py +++ b/app/agent/toolcall.py @@ -10,7 +10,6 @@ from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice from app.tool import CreateChatCompletion, Terminate, ToolCollection - TOOL_CALL_REQUIRED = "Tool calls required but none provided" @@ -71,40 +70,42 @@ class ToolCallAgent(ReActAgent): return False raise - self.tool_calls = response.tool_calls + self.tool_calls = tool_calls = ( + response.tool_calls if response and response.tool_calls else [] + ) + content = response.content if response and response.content else "" # Log response info - logger.info(f"✨ {self.name}'s thoughts: {response.content}") + logger.info(f"✨ {self.name}'s thoughts: {content}") logger.info( - f"🛠️ {self.name} selected {len(response.tool_calls) if response.tool_calls else 0} tools to use" + f"🛠️ {self.name} selected {len(tool_calls) if tool_calls else 0} tools to use" ) - if response.tool_calls: + if tool_calls: logger.info( - f"🧰 Tools being prepared: {[call.function.name for call in response.tool_calls]}" - ) - logger.info( - f"🔧 Tool arguments: {response.tool_calls[0].function.arguments}" + f"🧰 Tools being prepared: {[call.function.name for call in tool_calls]}" ) + logger.info(f"🔧 Tool arguments: {tool_calls[0].function.arguments}") try: + if response is None: + raise RuntimeError("No response received from the LLM") + # Handle different tool_choices modes if self.tool_choices == ToolChoice.NONE: - if response.tool_calls: + if tool_calls: logger.warning( f"🤔 Hmm, {self.name} tried to use tools when they weren't available!" ) - if response.content: - self.memory.add_message(Message.assistant_message(response.content)) + if content: + self.memory.add_message(Message.assistant_message(content)) return True return False # Create and add assistant message assistant_msg = ( - Message.from_tool_calls( - content=response.content, tool_calls=self.tool_calls - ) + Message.from_tool_calls(content=content, tool_calls=self.tool_calls) if self.tool_calls - else Message.assistant_message(response.content) + else Message.assistant_message(content) ) self.memory.add_message(assistant_msg) @@ -113,7 +114,7 @@ class ToolCallAgent(ReActAgent): # For 'auto' mode, continue with content if no commands but content exists if self.tool_choices == ToolChoice.AUTO and not self.tool_calls: - return bool(response.content) + return bool(content) return bool(self.tool_calls) except Exception as e: @@ -209,7 +210,7 @@ class ToolCallAgent(ReActAgent): return f"Error: {error_msg}" except Exception as e: error_msg = f"⚠️ Tool '{name}' encountered a problem: {str(e)}" - logger.error(error_msg) + logger.exception(error_msg) return f"Error: {error_msg}" async def _handle_special_tool(self, name: str, result: Any, **kwargs): diff --git a/app/llm.py b/app/llm.py index f99b6b1..1a4e05b 100644 --- a/app/llm.py +++ b/app/llm.py @@ -10,6 +10,7 @@ from openai import ( OpenAIError, RateLimitError, ) +from openai.types.chat.chat_completion_message import ChatCompletionMessage from tenacity import ( retry, retry_if_exception_type, @@ -653,7 +654,7 @@ class LLM: tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore temperature: Optional[float] = None, **kwargs, - ): + ) -> ChatCompletionMessage | None: """ Ask LLM using functions/tools and return the response. @@ -731,12 +732,15 @@ class LLM: temperature if temperature is not None else self.temperature ) - response = await self.client.chat.completions.create(**params) + response: ChatCompletion = await self.client.chat.completions.create( + **params, stream=False + ) # Check if response is valid if not response.choices or not response.choices[0].message: print(response) - raise ValueError("Invalid or empty response from LLM") + # raise ValueError("Invalid or empty response from LLM") + return None # Update token counts self.update_token_count( From 7e3609f19fb29412d57e2556af77ebc2f968e02d Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Wed, 19 Mar 2025 14:10:07 +0800 Subject: [PATCH 16/19] chore(file_operators): use utf-8 as default encoding --- app/tool/file_operators.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/app/tool/file_operators.py b/app/tool/file_operators.py index 61f8b16..5720172 100644 --- a/app/tool/file_operators.py +++ b/app/tool/file_operators.py @@ -42,17 +42,20 @@ class FileOperator(Protocol): class LocalFileOperator(FileOperator): """File operations implementation for local filesystem.""" + def __init__(self, encoding: str = "utf-8"): + self.encoding = encoding + async def read_file(self, path: PathLike) -> str: """Read content from a local file.""" try: - return Path(path).read_text() + return Path(path).read_text(encoding=self.encoding) except Exception as e: raise ToolError(f"Failed to read {path}: {str(e)}") from None async def write_file(self, path: PathLike, content: str) -> None: """Write content to a local file.""" try: - Path(path).write_text(content) + Path(path).write_text(content, encoding=self.encoding) except Exception as e: raise ToolError(f"Failed to write to {path}: {str(e)}") from None From d644d976b0bf0d791b96f4fe883b9544457a0836 Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Wed, 19 Mar 2025 14:12:46 +0800 Subject: [PATCH 17/19] fix: pre-commit --- app/agent/toolcall.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/agent/toolcall.py b/app/agent/toolcall.py index b9fab70..76f6f01 100644 --- a/app/agent/toolcall.py +++ b/app/agent/toolcall.py @@ -10,6 +10,7 @@ from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice from app.tool import CreateChatCompletion, Terminate, ToolCollection + TOOL_CALL_REQUIRED = "Tool calls required but none provided" From b9df45bc68e8cb995b47ba9d40d75e3d47db2f8c Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Wed, 19 Mar 2025 15:44:06 +0800 Subject: [PATCH 18/19] chore(app/tool): remove constructor in LocalFileOperator --- app/tool/file_operators.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/tool/file_operators.py b/app/tool/file_operators.py index 5720172..dd64c83 100644 --- a/app/tool/file_operators.py +++ b/app/tool/file_operators.py @@ -42,8 +42,7 @@ class FileOperator(Protocol): class LocalFileOperator(FileOperator): """File operations implementation for local filesystem.""" - def __init__(self, encoding: str = "utf-8"): - self.encoding = encoding + encoding: str = "utf-8" async def read_file(self, path: PathLike) -> str: """Read content from a local file.""" From 44243a1b979ed5ee11a4d88085aea439494b611e Mon Sep 17 00:00:00 2001 From: Sheng Fan Date: Wed, 19 Mar 2025 16:57:12 +0800 Subject: [PATCH 19/19] fix(browser_use_tool): reimplement screenshot logics to get JPEG data --- app/tool/browser_use_tool.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index 7fe8f16..0158e07 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -1,4 +1,5 @@ import asyncio +import base64 import json from typing import Generic, Optional, TypeVar @@ -552,7 +553,16 @@ Page content: viewport_height = ctx.config.browser_window_size.get("height", 0) # Take a screenshot for the state - screenshot = await ctx.take_screenshot(full_page=True) + page = await ctx.get_current_page() + + await page.bring_to_front() + await page.wait_for_load_state() + + screenshot = await page.screenshot( + full_page=True, animations="disabled", type="jpeg", quality=100 + ) + + screenshot = base64.b64encode(screenshot).decode("utf-8") # Build the state info with all required fields state_info = {