diff --git a/app/config.py b/app/config.py index 540a8ac..cfd3f25 100644 --- a/app/config.py +++ b/app/config.py @@ -1,7 +1,7 @@ import threading import tomllib from pathlib import Path -from typing import Dict +from typing import Dict, List, Optional from pydantic import BaseModel, Field @@ -25,8 +25,28 @@ class LLMSettings(BaseModel): api_version: str = Field(..., description="Azure Openai version if AzureOpenai") +class ProxySettings(BaseModel): + server: str = Field(None, description="Proxy server address") + username: Optional[str] = Field(None, description="Proxy username") + password: Optional[str] = Field(None, description="Proxy password") + + +class BrowserSettings(BaseModel): + headless: bool = Field(False, description="Whether to run browser in headless mode") + disable_security: bool = Field(True, description="Disable browser security features") + extra_chromium_args: List[str] = Field(default_factory=list, description="Extra arguments to pass to the browser") + chrome_instance_path: Optional[str] = Field(None, description="Path to a Chrome instance to use") + wss_url: Optional[str] = Field(None, description="Connect to a browser instance via WebSocket") + cdp_url: Optional[str] = Field(None, description="Connect to a browser instance via CDP") + proxy: Optional[ProxySettings] = Field(None, description="Proxy settings for the browser") + + class AppConfig(BaseModel): llm: Dict[str, LLMSettings] + browser_config: Optional[BrowserSettings] = Field(None, description="Browser configuration") + + class Config: + arbitrary_types_allowed = True class Config: @@ -82,6 +102,35 @@ class Config: "api_version": base_llm.get("api_version", ""), } + # handle browser config. + browser_config = raw_config.get("browser", {}) + browser_settings = None + + if browser_config: + # handle proxy settings. + proxy_config = browser_config.get("proxy", {}) + proxy_settings = None + + if proxy_config and proxy_config.get("server"): + proxy_settings = ProxySettings(**{ + k: v for k, v in proxy_config.items() + if k in ["server", "username", "password"] and v + }) + + # filter valid browser config parameters. + valid_browser_params = { + k: v for k, v in browser_config.items() + if k in BrowserSettings.__annotations__ and v is not None + } + + # if there is proxy settings, add it to the parameters. + if proxy_settings: + valid_browser_params["proxy"] = proxy_settings + + # only create BrowserSettings when there are valid parameters. + if valid_browser_params: + browser_settings = BrowserSettings(**valid_browser_params) + config_dict = { "llm": { "default": default_settings, @@ -89,7 +138,8 @@ class Config: name: {**default_settings, **override_config} for name, override_config in llm_overrides.items() }, - } + }, + "browser_config": browser_settings, } self._config = AppConfig(**config_dict) @@ -97,6 +147,10 @@ class Config: @property def llm(self) -> Dict[str, LLMSettings]: return self._config.llm + + @property + def browser_config(self) -> Optional[BrowserSettings]: + return self._config.browser_config config = Config() diff --git a/app/tool/browser_use_tool.py b/app/tool/browser_use_tool.py index e7cf9a6..22cefee 100644 --- a/app/tool/browser_use_tool.py +++ b/app/tool/browser_use_tool.py @@ -5,10 +5,12 @@ from typing import Optional from browser_use import Browser as BrowserUseBrowser from browser_use import BrowserConfig from browser_use.browser.context import BrowserContext +from browser_use.browser.context import BrowserContextConfig from browser_use.dom.service import DomService from pydantic import Field, field_validator from pydantic_core.core_schema import ValidationInfo +from app.config import config from app.tool.base import BaseTool, ToolResult MAX_LENGTH = 2000 @@ -104,10 +106,44 @@ class BrowserUseTool(BaseTool): async def _ensure_browser_initialized(self) -> BrowserContext: """Ensure browser and context are initialized.""" if self.browser is None: - self.browser = BrowserUseBrowser(BrowserConfig(headless=False)) + browser_config_kwargs = {"headless": False} + + if config.browser_config: + from browser_use.browser.browser import ProxySettings + + # handle proxy settings. + if config.browser_config.proxy and config.browser_config.proxy.server: + browser_config_kwargs["proxy"] = ProxySettings( + server=config.browser_config.proxy.server, + username=config.browser_config.proxy.username, + password=config.browser_config.proxy.password + ) + + browser_attrs = [ + "headless", "disable_security", "extra_chromium_args", + "chrome_instance_path", "wss_url", "cdp_url" + ] + + for attr in browser_attrs: + value = getattr(config.browser_config, attr, None) + if value is not None: + if not isinstance(value, list) or value: + browser_config_kwargs[attr] = value + + self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs)) + if self.context is None: - self.context = await self.browser.new_context() + context_config = BrowserContextConfig() + + # if there is context config in the config, use it. + if (config.browser_config and + hasattr(config.browser_config, 'new_context_config') and + config.browser_config.new_context_config): + context_config = config.browser_config.new_context_config + + self.context = await self.browser.new_context(context_config) self.dom_service = DomService(await self.context.get_current_page()) + return self.context async def execute( diff --git a/config/config.example.toml b/config/config.example.toml index c1d51f0..13648dd 100644 --- a/config/config.example.toml +++ b/config/config.example.toml @@ -20,3 +20,25 @@ temperature = 0.0 model = "claude-3-5-sonnet" base_url = "https://api.openai.com/v1" api_key = "sk-..." + +# Optional configuration for specific browser configuration +# [browser] +# Whether to run browser in headless mode (default: false) +#headless = false +# Disable browser security features (default: true) +#disable_security = true +# Extra arguments to pass to the browser +#extra_chromium_args = [] +# Path to a Chrome instance to use to connect to your normal browser +# e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' +#chrome_instance_path = "" +# Connect to a browser instance via WebSocket +#wss_url = "" +# Connect to a browser instance via CDP +#cdp_url = "" + +# Optional configuration, Proxy settings for the browser +# [browser.proxy] +# server = "http://proxy-server:port" +# username = "proxy-username" +# password = "proxy-password"