Apply black and isort formatting

This commit is contained in:
via 2025-03-19 09:13:47 +08:00
parent b62bf92e19
commit 47adb33bd9
3 changed files with 35 additions and 25 deletions

View File

@ -1,9 +1,8 @@
from app.tool.search.baidu_search import BaiduSearchEngine from app.tool.search.baidu_search import BaiduSearchEngine
from app.tool.search.base import WebSearchEngine from app.tool.search.base import WebSearchEngine
from app.tool.search.bing_search import BingSearchEngine
from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
from app.tool.search.google_search import GoogleSearchEngine from app.tool.search.google_search import GoogleSearchEngine
from app.tool.search.bing_search import BingSearchEngine
__all__ = [ __all__ = [
"WebSearchEngine", "WebSearchEngine",

View File

@ -1,22 +1,24 @@
from typing import List from typing import List
import requests import requests
from app.logger import logger
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from app.logger import logger
from app.tool.search.base import WebSearchEngine from app.tool.search.base import WebSearchEngine
ABSTRACT_MAX_LENGTH = 300 ABSTRACT_MAX_LENGTH = 300
USER_AGENTS = [ USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36",
'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net', "Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net",
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2', "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2",
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3', "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3",
'Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7', "Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7",
'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201', "Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201",
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866' "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866",
] ]
HEADERS = { HEADERS = {
@ -25,7 +27,7 @@ HEADERS = {
"User-Agent": USER_AGENTS[0], "User-Agent": USER_AGENTS[0],
"Referer": "https://www.bing.com/", "Referer": "https://www.bing.com/",
"Accept-Encoding": "gzip, deflate", "Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9" "Accept-Language": "zh-CN,zh;q=0.9",
} }
BING_HOST_URL = "https://www.bing.com" BING_HOST_URL = "https://www.bing.com"
@ -65,7 +67,9 @@ class BingSearchEngine(WebSearchEngine):
next_url = BING_SEARCH_URL + query next_url = BING_SEARCH_URL + query
while len(list_result) < num_results: while len(list_result) < num_results:
data, next_url = self._parse_html(next_url, rank_start=len(list_result), first=first) data, next_url = self._parse_html(
next_url, rank_start=len(list_result), first=first
)
if data: if data:
list_result.extend([item["url"] for item in data]) list_result.extend([item["url"] for item in data])
if not next_url: if not next_url:
@ -98,14 +102,14 @@ class BingSearchEngine(WebSearchEngine):
return [], None return [], None
for li in ol_results.find_all("li", class_="b_algo"): for li in ol_results.find_all("li", class_="b_algo"):
title = '' title = ""
url = '' url = ""
abstract = '' abstract = ""
try: try:
h2 = li.find("h2") h2 = li.find("h2")
if h2: if h2:
title = h2.text.strip() title = h2.text.strip()
url = h2.a['href'].strip() url = h2.a["href"].strip()
p = li.find("p") p = li.find("p")
if p: if p:
@ -115,7 +119,14 @@ class BingSearchEngine(WebSearchEngine):
abstract = abstract[:ABSTRACT_MAX_LENGTH] abstract = abstract[:ABSTRACT_MAX_LENGTH]
rank_start += 1 rank_start += 1
list_data.append({"title": title, "abstract": abstract, "url": url, "rank": rank_start}) list_data.append(
{
"title": title,
"abstract": abstract,
"url": url,
"rank": rank_start,
}
)
except Exception: except Exception:
continue continue
@ -131,4 +142,4 @@ class BingSearchEngine(WebSearchEngine):
def perform_search(self, query, num_results=10, *args, **kwargs): def perform_search(self, query, num_results=10, *args, **kwargs):
"""Bing search engine.""" """Bing search engine."""
return self._search_sync(query, num_results=num_results) return self._search_sync(query, num_results=num_results)

View File

@ -6,11 +6,11 @@ from tenacity import retry, stop_after_attempt, wait_exponential
from app.config import config from app.config import config
from app.tool.base import BaseTool from app.tool.base import BaseTool
from app.tool.search import ( from app.tool.search import (
BaiduSearchEngine, BaiduSearchEngine,
BingSearchEngine,
DuckDuckGoSearchEngine, DuckDuckGoSearchEngine,
GoogleSearchEngine, GoogleSearchEngine,
WebSearchEngine, WebSearchEngine
BingSearchEngine
) )
@ -38,7 +38,7 @@ class WebSearch(BaseTool):
"google": GoogleSearchEngine(), "google": GoogleSearchEngine(),
"baidu": BaiduSearchEngine(), "baidu": BaiduSearchEngine(),
"duckduckgo": DuckDuckGoSearchEngine(), "duckduckgo": DuckDuckGoSearchEngine(),
"bing": BingSearchEngine() "bing": BingSearchEngine(),
} }
async def execute(self, query: str, num_results: int = 10) -> List[str]: async def execute(self, query: str, num_results: int = 10) -> List[str]: