Merge branch 'refs/heads/main' into mcp
This commit is contained in:
commit
167b1acd5c
@ -25,7 +25,7 @@ class LLMSettings(BaseModel):
|
||||
description="Maximum input tokens to use across all requests (None for unlimited)",
|
||||
)
|
||||
temperature: float = Field(1.0, description="Sampling temperature")
|
||||
api_type: str = Field(..., description="AzureOpenai or Openai")
|
||||
api_type: str = Field(..., description="Azure, Openai, or Ollama")
|
||||
api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
|
||||
|
||||
|
||||
|
145
app/llm.py
145
app/llm.py
@ -30,6 +30,14 @@ from app.schema import (
|
||||
|
||||
|
||||
REASONING_MODELS = ["o1", "o3-mini"]
|
||||
MULTIMODAL_MODELS = [
|
||||
"gpt-4-vision-preview",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-3-haiku-20240307",
|
||||
]
|
||||
|
||||
|
||||
class TokenCounter:
|
||||
@ -259,12 +267,15 @@ class LLM:
|
||||
return "Token limit exceeded"
|
||||
|
||||
@staticmethod
|
||||
def format_messages(messages: List[Union[dict, Message]]) -> List[dict]:
|
||||
def format_messages(
|
||||
messages: List[Union[dict, Message]], supports_images: bool = False
|
||||
) -> List[dict]:
|
||||
"""
|
||||
Format messages for LLM by converting them to OpenAI message format.
|
||||
|
||||
Args:
|
||||
messages: List of messages that can be either dict or Message objects
|
||||
supports_images: Flag indicating if the target model supports image inputs
|
||||
|
||||
Returns:
|
||||
List[dict]: List of formatted messages in OpenAI format
|
||||
@ -288,54 +299,58 @@ class LLM:
|
||||
if isinstance(message, Message):
|
||||
message = message.to_dict()
|
||||
|
||||
if not isinstance(message, dict):
|
||||
if isinstance(message, dict):
|
||||
# If message is a dict, ensure it has required fields
|
||||
if "role" not in message:
|
||||
raise ValueError("Message dict must contain 'role' field")
|
||||
|
||||
# Process base64 images if present and model supports images
|
||||
if supports_images and message.get("base64_image"):
|
||||
# Initialize or convert content to appropriate format
|
||||
if not message.get("content"):
|
||||
message["content"] = []
|
||||
elif isinstance(message["content"], str):
|
||||
message["content"] = [
|
||||
{"type": "text", "text": message["content"]}
|
||||
]
|
||||
elif isinstance(message["content"], list):
|
||||
# Convert string items to proper text objects
|
||||
message["content"] = [
|
||||
(
|
||||
{"type": "text", "text": item}
|
||||
if isinstance(item, str)
|
||||
else item
|
||||
)
|
||||
for item in message["content"]
|
||||
]
|
||||
|
||||
# Add the image to content
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{message['base64_image']}"
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Remove the base64_image field
|
||||
del message["base64_image"]
|
||||
# If model doesn't support images but message has base64_image, handle gracefully
|
||||
elif not supports_images and message.get("base64_image"):
|
||||
# Just remove the base64_image field and keep the text content
|
||||
del message["base64_image"]
|
||||
|
||||
if "content" in message or "tool_calls" in message:
|
||||
formatted_messages.append(message)
|
||||
# else: do not include the message
|
||||
else:
|
||||
raise TypeError(f"Unsupported message type: {type(message)}")
|
||||
|
||||
# Validate required fields
|
||||
if "role" not in message:
|
||||
raise ValueError("Message dict must contain 'role' field")
|
||||
|
||||
# Process base64 images if present
|
||||
if message.get("base64_image"):
|
||||
# Initialize or convert content to appropriate format
|
||||
if not message.get("content"):
|
||||
message["content"] = []
|
||||
elif isinstance(message["content"], str):
|
||||
message["content"] = [{"type": "text", "text": message["content"]}]
|
||||
elif isinstance(message["content"], list):
|
||||
# Convert string items to proper text objects
|
||||
message["content"] = [
|
||||
(
|
||||
{"type": "text", "text": item}
|
||||
if isinstance(item, str)
|
||||
else item
|
||||
)
|
||||
for item in message["content"]
|
||||
]
|
||||
|
||||
# Add the image to content
|
||||
message["content"].append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{message['base64_image']}"
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Remove the base64_image field
|
||||
del message["base64_image"]
|
||||
|
||||
# Only include messages with content or tool_calls
|
||||
if "content" in message or "tool_calls" in message:
|
||||
formatted_messages.append(message)
|
||||
|
||||
# Validate all roles
|
||||
invalid_roles = [
|
||||
msg for msg in formatted_messages if msg["role"] not in ROLE_VALUES
|
||||
]
|
||||
if invalid_roles:
|
||||
raise ValueError(f"Invalid role: {invalid_roles[0]['role']}")
|
||||
# Validate all messages have required fields
|
||||
for msg in formatted_messages:
|
||||
if msg["role"] not in ROLE_VALUES:
|
||||
raise ValueError(f"Invalid role: {msg['role']}")
|
||||
|
||||
return formatted_messages
|
||||
|
||||
@ -372,12 +387,15 @@ class LLM:
|
||||
Exception: For unexpected errors
|
||||
"""
|
||||
try:
|
||||
# Format system and user messages
|
||||
# Check if the model supports images
|
||||
supports_images = self.model in MULTIMODAL_MODELS
|
||||
|
||||
# Format system and user messages with image support check
|
||||
if system_msgs:
|
||||
system_msgs = self.format_messages(system_msgs)
|
||||
messages = system_msgs + self.format_messages(messages)
|
||||
system_msgs = self.format_messages(system_msgs, supports_images)
|
||||
messages = system_msgs + self.format_messages(messages, supports_images)
|
||||
else:
|
||||
messages = self.format_messages(messages)
|
||||
messages = self.format_messages(messages, supports_images)
|
||||
|
||||
# Calculate input token count
|
||||
input_tokens = self.count_message_tokens(messages)
|
||||
@ -499,8 +517,15 @@ class LLM:
|
||||
Exception: For unexpected errors
|
||||
"""
|
||||
try:
|
||||
# Format messages
|
||||
formatted_messages = self.format_messages(messages)
|
||||
# For ask_with_images, we always set supports_images to True because
|
||||
# this method should only be called with models that support images
|
||||
if self.model not in MULTIMODAL_MODELS:
|
||||
raise ValueError(
|
||||
f"Model {self.model} does not support images. Use a model from {MULTIMODAL_MODELS}"
|
||||
)
|
||||
|
||||
# Format messages with image support
|
||||
formatted_messages = self.format_messages(messages, supports_images=True)
|
||||
|
||||
# Ensure the last message is from the user to attach images
|
||||
if not formatted_messages or formatted_messages[-1]["role"] != "user":
|
||||
@ -539,7 +564,10 @@ class LLM:
|
||||
|
||||
# Add system messages if provided
|
||||
if system_msgs:
|
||||
all_messages = self.format_messages(system_msgs) + formatted_messages
|
||||
all_messages = (
|
||||
self.format_messages(system_msgs, supports_images=True)
|
||||
+ formatted_messages
|
||||
)
|
||||
else:
|
||||
all_messages = formatted_messages
|
||||
|
||||
@ -653,12 +681,15 @@ class LLM:
|
||||
if tool_choice not in TOOL_CHOICE_VALUES:
|
||||
raise ValueError(f"Invalid tool_choice: {tool_choice}")
|
||||
|
||||
# Check if the model supports images
|
||||
supports_images = self.model in MULTIMODAL_MODELS
|
||||
|
||||
# Format messages
|
||||
if system_msgs:
|
||||
system_msgs = self.format_messages(system_msgs)
|
||||
messages = system_msgs + self.format_messages(messages)
|
||||
system_msgs = self.format_messages(system_msgs, supports_images)
|
||||
messages = system_msgs + self.format_messages(messages, supports_images)
|
||||
else:
|
||||
messages = self.format_messages(messages)
|
||||
messages = self.format_messages(messages, supports_images)
|
||||
|
||||
# Calculate input token count
|
||||
input_tokens = self.count_message_tokens(messages)
|
||||
|
@ -418,17 +418,7 @@ class BrowserUseTool(BaseTool, Generic[Context]):
|
||||
|
||||
# Create prompt for LLM
|
||||
prompt_text = """
|
||||
Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page.
|
||||
|
||||
Examples of extraction goals:
|
||||
- Extract all company names
|
||||
- Extract specific descriptions
|
||||
- Extract all information about a topic
|
||||
- Extract links with companies in structured format
|
||||
- Extract all links
|
||||
|
||||
If the goal is vague, summarize the page. Respond in JSON format.
|
||||
|
||||
Your task is to extract the content of the page. You will be given a page and a goal, and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format.
|
||||
Extraction goal: {goal}
|
||||
|
||||
Page content:
|
||||
@ -445,10 +435,54 @@ Page content:
|
||||
|
||||
messages = [Message.user_message(formatted_prompt)]
|
||||
|
||||
# Use LLM to extract content based on the goal
|
||||
response = await self.llm.ask(messages)
|
||||
# Define extraction function for the tool
|
||||
extraction_function = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "extract_content",
|
||||
"description": "Extract specific information from a webpage based on a goal",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"extracted_content": {
|
||||
"type": "object",
|
||||
"description": "The content extracted from the page according to the goal",
|
||||
}
|
||||
},
|
||||
"required": ["extracted_content"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Use LLM to extract content with required function calling
|
||||
response = await self.llm.ask_tool(
|
||||
messages,
|
||||
tools=[extraction_function],
|
||||
tool_choice="required",
|
||||
)
|
||||
|
||||
# Extract content from function call response
|
||||
if (
|
||||
response
|
||||
and response.tool_calls
|
||||
and len(response.tool_calls) > 0
|
||||
):
|
||||
# Get the first tool call arguments
|
||||
tool_call = response.tool_calls[0]
|
||||
# Parse the JSON arguments
|
||||
try:
|
||||
args = json.loads(tool_call.function.arguments)
|
||||
extracted_content = args.get("extracted_content", {})
|
||||
# Format extracted content as JSON string
|
||||
content_json = json.dumps(
|
||||
extracted_content, indent=2, ensure_ascii=False
|
||||
)
|
||||
msg = f"Extracted from page:\n{content_json}\n"
|
||||
except Exception as e:
|
||||
msg = f"Error parsing extraction result: {str(e)}\nRaw response: {tool_call.function.arguments}"
|
||||
else:
|
||||
msg = "No content was extracted from the page."
|
||||
|
||||
msg = f"Extracted from page:\n{response}\n"
|
||||
return ToolResult(output=msg)
|
||||
except Exception as e:
|
||||
# Provide a more helpful error message
|
||||
|
@ -1,5 +1,6 @@
|
||||
from app.tool.search.baidu_search import BaiduSearchEngine
|
||||
from app.tool.search.base import WebSearchEngine
|
||||
from app.tool.search.bing_search import BingSearchEngine
|
||||
from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
|
||||
from app.tool.search.google_search import GoogleSearchEngine
|
||||
|
||||
@ -9,4 +10,5 @@ __all__ = [
|
||||
"BaiduSearchEngine",
|
||||
"DuckDuckGoSearchEngine",
|
||||
"GoogleSearchEngine",
|
||||
"BingSearchEngine",
|
||||
]
|
||||
|
146
app/tool/search/bing_search.py
Normal file
146
app/tool/search/bing_search.py
Normal file
@ -0,0 +1,146 @@
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from app.logger import logger
|
||||
from app.tool.search.base import WebSearchEngine
|
||||
|
||||
|
||||
ABSTRACT_MAX_LENGTH = 300
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
|
||||
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3",
|
||||
"Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866",
|
||||
]
|
||||
|
||||
HEADERS = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"User-Agent": USER_AGENTS[0],
|
||||
"Referer": "https://www.bing.com/",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
}
|
||||
|
||||
BING_HOST_URL = "https://www.bing.com"
|
||||
BING_SEARCH_URL = "https://www.bing.com/search?q="
|
||||
|
||||
|
||||
class BingSearchEngine(WebSearchEngine):
|
||||
session: requests.Session = None
|
||||
|
||||
def __init__(self, **data):
|
||||
"""Initialize the BingSearch tool with a requests session."""
|
||||
super().__init__(**data)
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(HEADERS)
|
||||
|
||||
def _search_sync(self, query: str, num_results: int = 10) -> List[str]:
|
||||
"""
|
||||
Synchronous Bing search implementation to retrieve a list of URLs matching a query.
|
||||
|
||||
Args:
|
||||
query (str): The search query to submit to Bing. Must not be empty.
|
||||
num_results (int, optional): The maximum number of URLs to return. Defaults to 10.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of URLs from the search results, capped at `num_results`.
|
||||
Returns an empty list if the query is empty or no results are found.
|
||||
|
||||
Notes:
|
||||
- Pagination is handled by incrementing the `first` parameter and following `next_url` links.
|
||||
- If fewer results than `num_results` are available, all found URLs are returned.
|
||||
"""
|
||||
if not query:
|
||||
return []
|
||||
|
||||
list_result = []
|
||||
first = 1
|
||||
next_url = BING_SEARCH_URL + query
|
||||
|
||||
while len(list_result) < num_results:
|
||||
data, next_url = self._parse_html(
|
||||
next_url, rank_start=len(list_result), first=first
|
||||
)
|
||||
if data:
|
||||
list_result.extend([item["url"] for item in data])
|
||||
if not next_url:
|
||||
break
|
||||
first += 10
|
||||
|
||||
return list_result[:num_results]
|
||||
|
||||
def _parse_html(self, url: str, rank_start: int = 0, first: int = 1) -> tuple:
|
||||
"""
|
||||
Parse Bing search result HTML synchronously to extract search results and the next page URL.
|
||||
|
||||
Args:
|
||||
url (str): The URL of the Bing search results page to parse.
|
||||
rank_start (int, optional): The starting rank for numbering the search results. Defaults to 0.
|
||||
first (int, optional): Unused parameter (possibly legacy). Defaults to 1.
|
||||
Returns:
|
||||
tuple: A tuple containing:
|
||||
- list: A list of dictionaries with keys 'title', 'abstract', 'url', and 'rank' for each result.
|
||||
- str or None: The URL of the next results page, or None if there is no next page.
|
||||
"""
|
||||
try:
|
||||
res = self.session.get(url=url)
|
||||
res.encoding = "utf-8"
|
||||
root = BeautifulSoup(res.text, "lxml")
|
||||
|
||||
list_data = []
|
||||
ol_results = root.find("ol", id="b_results")
|
||||
if not ol_results:
|
||||
return [], None
|
||||
|
||||
for li in ol_results.find_all("li", class_="b_algo"):
|
||||
title = ""
|
||||
url = ""
|
||||
abstract = ""
|
||||
try:
|
||||
h2 = li.find("h2")
|
||||
if h2:
|
||||
title = h2.text.strip()
|
||||
url = h2.a["href"].strip()
|
||||
|
||||
p = li.find("p")
|
||||
if p:
|
||||
abstract = p.text.strip()
|
||||
|
||||
if ABSTRACT_MAX_LENGTH and len(abstract) > ABSTRACT_MAX_LENGTH:
|
||||
abstract = abstract[:ABSTRACT_MAX_LENGTH]
|
||||
|
||||
rank_start += 1
|
||||
list_data.append(
|
||||
{
|
||||
"title": title,
|
||||
"abstract": abstract,
|
||||
"url": url,
|
||||
"rank": rank_start,
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
next_btn = root.find("a", title="Next page")
|
||||
if not next_btn:
|
||||
return list_data, None
|
||||
|
||||
next_url = BING_HOST_URL + next_btn["href"]
|
||||
return list_data, next_url
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing HTML: {e}")
|
||||
return [], None
|
||||
|
||||
def perform_search(self, query, num_results=10, *args, **kwargs):
|
||||
"""Bing search engine."""
|
||||
return self._search_sync(query, num_results=num_results)
|
@ -7,6 +7,7 @@ from app.config import config
|
||||
from app.tool.base import BaseTool
|
||||
from app.tool.search import (
|
||||
BaiduSearchEngine,
|
||||
BingSearchEngine,
|
||||
DuckDuckGoSearchEngine,
|
||||
GoogleSearchEngine,
|
||||
WebSearchEngine,
|
||||
@ -37,6 +38,7 @@ class WebSearch(BaseTool):
|
||||
"google": GoogleSearchEngine(),
|
||||
"baidu": BaiduSearchEngine(),
|
||||
"duckduckgo": DuckDuckGoSearchEngine(),
|
||||
"bing": BingSearchEngine(),
|
||||
}
|
||||
|
||||
async def execute(self, query: str, num_results: int = 10) -> List[str]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user