feat(search): Add robust fallback system with configurable retries and enhanced error handling

- Implement multi-engine failover system with configurable fallback order
    - Add retry logic with exponential backoff and rate limit detection
    - Introduce search configuration options:
      * fallback_engines: Ordered list of backup search providers
      * retry_delay: Seconds between retry batches (default: 60)
      * max_retries: Maximum system-wide retry attempts (default: 3)
    - Improve error resilience with:
      - Automatic engine switching on 429/Too Many Requests
      - Full system retries after configurable cooldown periods
      - Detailed logging for diagnostics and monitoring
    - Enhance engine prioritization logic:
      1. Primary configured engine
      2. Configured fallback engines
      3. Remaining available engines

    Example configuration:
    [search]
    engine = "Google"
    fallback_engines = ["DuckDuckGo", "Baidu"]  # Cascading fallback order
    retry_delay = 60                            # 1 minute between retry batches
    max_retries = 3                             # Attempt 3 full system retries

    This addresses critical reliability issues by:
    - Preventing search failures due to single-engine rate limits
    - Enabling recovery from transient network errors
    - Providing operational flexibility through configurable parameters
    - Improving visibility through granular logging (INFO/WARN/ERROR)
This commit is contained in:
Johan Holm 2025-03-17 10:43:42 +01:00
parent 05e41a86ed
commit 711c2805e4
3 changed files with 88 additions and 4 deletions

View File

@ -37,6 +37,18 @@ class ProxySettings(BaseModel):
class SearchSettings(BaseModel): class SearchSettings(BaseModel):
engine: str = Field(default="Google", description="Search engine the llm to use") engine: str = Field(default="Google", description="Search engine the llm to use")
fallback_engines: List[str] = Field(
default_factory=lambda: ["DuckDuckGo", "Baidu"],
description="Fallback search engines to try if the primary engine fails",
)
retry_delay: int = Field(
default=60,
description="Seconds to wait before retrying all engines again after they all fail",
)
max_retries: int = Field(
default=3,
description="Maximum number of times to retry all engines when all fail",
)
class BrowserSettings(BaseModel): class BrowserSettings(BaseModel):

View File

@ -2,6 +2,7 @@ import asyncio
from typing import List from typing import List
from tenacity import retry, stop_after_attempt, wait_exponential from tenacity import retry, stop_after_attempt, wait_exponential
from app.logger import logger
from app.config import config from app.config import config
from app.tool.base import BaseTool from app.tool.base import BaseTool
@ -42,6 +43,8 @@ class WebSearch(BaseTool):
async def execute(self, query: str, num_results: int = 10) -> List[str]: async def execute(self, query: str, num_results: int = 10) -> List[str]:
""" """
Execute a Web search and return a list of URLs. Execute a Web search and return a list of URLs.
Tries engines in order based on configuration, falling back if an engine fails with errors.
If all engines fail, it will wait and retry up to the configured number of times.
Args: Args:
query (str): The search query to submit to the search engine. query (str): The search query to submit to the search engine.
@ -50,37 +53,100 @@ class WebSearch(BaseTool):
Returns: Returns:
List[str]: A list of URLs matching the search query. List[str]: A list of URLs matching the search query.
""" """
# Get retry settings from config
retry_delay = 60 # Default to 60 seconds
max_retries = 3 # Default to 3 retries
if config.search_config:
retry_delay = getattr(config.search_config, "retry_delay", 60)
max_retries = getattr(config.search_config, "max_retries", 3)
# Try searching with retries when all engines fail
for retry_count in range(max_retries + 1): # +1 because first try is not a retry
links = await self._try_all_engines(query, num_results)
if links:
return links
if retry_count < max_retries:
# All engines failed, wait and retry
logger.warning(f"All search engines failed. Waiting {retry_delay} seconds before retry {retry_count + 1}/{max_retries}...")
await asyncio.sleep(retry_delay)
else:
logger.error(f"All search engines failed after {max_retries} retries. Giving up.")
return []
async def _try_all_engines(self, query: str, num_results: int) -> List[str]:
"""
Try all search engines in the configured order.
Args:
query (str): The search query to submit to the search engine.
num_results (int): The number of search results to return.
Returns:
List[str]: A list of URLs matching the search query, or empty list if all engines fail.
"""
engine_order = self._get_engine_order() engine_order = self._get_engine_order()
failed_engines = []
for engine_name in engine_order: for engine_name in engine_order:
engine = self._search_engine[engine_name] engine = self._search_engine[engine_name]
try: try:
logger.info(f"🔎 Attempting search with {engine_name.capitalize()}...")
links = await self._perform_search_with_engine( links = await self._perform_search_with_engine(
engine, query, num_results engine, query, num_results
) )
if links: if links:
if failed_engines:
logger.info(f"Search successful with {engine_name.capitalize()} after trying: {', '.join(failed_engines)}")
return links return links
except Exception as e: except Exception as e:
print(f"Search engine '{engine_name}' failed with error: {e}") failed_engines.append(engine_name.capitalize())
is_rate_limit = "429" in str(e) or "Too Many Requests" in str(e)
if is_rate_limit:
logger.warning(f"⚠️ {engine_name.capitalize()} search engine rate limit exceeded, trying next engine...")
else:
logger.warning(f"⚠️ {engine_name.capitalize()} search failed with error: {e}")
if failed_engines:
logger.error(f"All search engines failed: {', '.join(failed_engines)}")
return [] return []
def _get_engine_order(self) -> List[str]: def _get_engine_order(self) -> List[str]:
""" """
Determines the order in which to try search engines. Determines the order in which to try search engines.
Preferred engine is first (based on configuration), followed by the remaining engines. Preferred engine is first (based on configuration), followed by fallback engines,
and then the remaining engines.
Returns: Returns:
List[str]: Ordered list of search engine names. List[str]: Ordered list of search engine names.
""" """
preferred = "google" preferred = "google"
if config.search_config and config.search_config.engine: fallbacks = []
preferred = config.search_config.engine.lower()
if config.search_config:
if config.search_config.engine:
preferred = config.search_config.engine.lower()
if config.search_config.fallback_engines:
fallbacks = [engine.lower() for engine in config.search_config.fallback_engines]
engine_order = [] engine_order = []
# Add preferred engine first
if preferred in self._search_engine: if preferred in self._search_engine:
engine_order.append(preferred) engine_order.append(preferred)
# Add configured fallback engines in order
for fallback in fallbacks:
if fallback in self._search_engine and fallback not in engine_order:
engine_order.append(fallback)
# Add any remaining engines
for key in self._search_engine: for key in self._search_engine:
if key not in engine_order: if key not in engine_order:
engine_order.append(key) engine_order.append(key)
return engine_order return engine_order
@retry( @retry(

View File

@ -65,3 +65,9 @@ temperature = 0.0 # Controls randomness for vision mod
# [search] # [search]
# Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo". # Search engine for agent to use. Default is "Google", can be set to "Baidu" or "DuckDuckGo".
#engine = "Google" #engine = "Google"
# Fallback engine order. Default is ["DuckDuckGo", "Baidu"] - will try in this order after primary engine fails.
#fallback_engines = ["DuckDuckGo", "Baidu"]
# Seconds to wait before retrying all engines again when they all fail due to rate limits. Default is 60.
#retry_delay = 60
# Maximum number of times to retry all engines when all fail. Default is 3.
#max_retries = 3