Apply black and isort formatting
This commit is contained in:
parent
b62bf92e19
commit
47adb33bd9
@ -1,9 +1,8 @@
|
|||||||
from app.tool.search.baidu_search import BaiduSearchEngine
|
from app.tool.search.baidu_search import BaiduSearchEngine
|
||||||
from app.tool.search.base import WebSearchEngine
|
from app.tool.search.base import WebSearchEngine
|
||||||
|
from app.tool.search.bing_search import BingSearchEngine
|
||||||
from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
|
from app.tool.search.duckduckgo_search import DuckDuckGoSearchEngine
|
||||||
from app.tool.search.google_search import GoogleSearchEngine
|
from app.tool.search.google_search import GoogleSearchEngine
|
||||||
from app.tool.search.bing_search import BingSearchEngine
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"WebSearchEngine",
|
"WebSearchEngine",
|
||||||
|
@ -1,22 +1,24 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from app.logger import logger
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from app.logger import logger
|
||||||
from app.tool.search.base import WebSearchEngine
|
from app.tool.search.base import WebSearchEngine
|
||||||
|
|
||||||
ABSTRACT_MAX_LENGTH = 300
|
ABSTRACT_MAX_LENGTH = 300
|
||||||
|
|
||||||
USER_AGENTS = [
|
USER_AGENTS = [
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
|
||||||
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
|
||||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36',
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36",
|
||||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net',
|
"Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) QtWeb Internet Browser/3.7 http://www.QtWeb.net",
|
||||||
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2',
|
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2",
|
||||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3',
|
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) Gecko/20070404 K-Ninja/2.1.3",
|
||||||
'Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7',
|
"Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; en-US) iNet Browser 4.7",
|
||||||
'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
|
"Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201",
|
||||||
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866'
|
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866",
|
||||||
]
|
]
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
@ -25,7 +27,7 @@ HEADERS = {
|
|||||||
"User-Agent": USER_AGENTS[0],
|
"User-Agent": USER_AGENTS[0],
|
||||||
"Referer": "https://www.bing.com/",
|
"Referer": "https://www.bing.com/",
|
||||||
"Accept-Encoding": "gzip, deflate",
|
"Accept-Encoding": "gzip, deflate",
|
||||||
"Accept-Language": "zh-CN,zh;q=0.9"
|
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||||
}
|
}
|
||||||
|
|
||||||
BING_HOST_URL = "https://www.bing.com"
|
BING_HOST_URL = "https://www.bing.com"
|
||||||
@ -65,7 +67,9 @@ class BingSearchEngine(WebSearchEngine):
|
|||||||
next_url = BING_SEARCH_URL + query
|
next_url = BING_SEARCH_URL + query
|
||||||
|
|
||||||
while len(list_result) < num_results:
|
while len(list_result) < num_results:
|
||||||
data, next_url = self._parse_html(next_url, rank_start=len(list_result), first=first)
|
data, next_url = self._parse_html(
|
||||||
|
next_url, rank_start=len(list_result), first=first
|
||||||
|
)
|
||||||
if data:
|
if data:
|
||||||
list_result.extend([item["url"] for item in data])
|
list_result.extend([item["url"] for item in data])
|
||||||
if not next_url:
|
if not next_url:
|
||||||
@ -98,14 +102,14 @@ class BingSearchEngine(WebSearchEngine):
|
|||||||
return [], None
|
return [], None
|
||||||
|
|
||||||
for li in ol_results.find_all("li", class_="b_algo"):
|
for li in ol_results.find_all("li", class_="b_algo"):
|
||||||
title = ''
|
title = ""
|
||||||
url = ''
|
url = ""
|
||||||
abstract = ''
|
abstract = ""
|
||||||
try:
|
try:
|
||||||
h2 = li.find("h2")
|
h2 = li.find("h2")
|
||||||
if h2:
|
if h2:
|
||||||
title = h2.text.strip()
|
title = h2.text.strip()
|
||||||
url = h2.a['href'].strip()
|
url = h2.a["href"].strip()
|
||||||
|
|
||||||
p = li.find("p")
|
p = li.find("p")
|
||||||
if p:
|
if p:
|
||||||
@ -115,7 +119,14 @@ class BingSearchEngine(WebSearchEngine):
|
|||||||
abstract = abstract[:ABSTRACT_MAX_LENGTH]
|
abstract = abstract[:ABSTRACT_MAX_LENGTH]
|
||||||
|
|
||||||
rank_start += 1
|
rank_start += 1
|
||||||
list_data.append({"title": title, "abstract": abstract, "url": url, "rank": rank_start})
|
list_data.append(
|
||||||
|
{
|
||||||
|
"title": title,
|
||||||
|
"abstract": abstract,
|
||||||
|
"url": url,
|
||||||
|
"rank": rank_start,
|
||||||
|
}
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -7,10 +7,10 @@ from app.config import config
|
|||||||
from app.tool.base import BaseTool
|
from app.tool.base import BaseTool
|
||||||
from app.tool.search import (
|
from app.tool.search import (
|
||||||
BaiduSearchEngine,
|
BaiduSearchEngine,
|
||||||
|
BingSearchEngine,
|
||||||
DuckDuckGoSearchEngine,
|
DuckDuckGoSearchEngine,
|
||||||
GoogleSearchEngine,
|
GoogleSearchEngine,
|
||||||
WebSearchEngine,
|
WebSearchEngine
|
||||||
BingSearchEngine
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -38,7 +38,7 @@ class WebSearch(BaseTool):
|
|||||||
"google": GoogleSearchEngine(),
|
"google": GoogleSearchEngine(),
|
||||||
"baidu": BaiduSearchEngine(),
|
"baidu": BaiduSearchEngine(),
|
||||||
"duckduckgo": DuckDuckGoSearchEngine(),
|
"duckduckgo": DuckDuckGoSearchEngine(),
|
||||||
"bing": BingSearchEngine()
|
"bing": BingSearchEngine(),
|
||||||
}
|
}
|
||||||
|
|
||||||
async def execute(self, query: str, num_results: int = 10) -> List[str]:
|
async def execute(self, query: str, num_results: int = 10) -> List[str]:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user