Merge pull request #465 from nezhazheng/main

Support configuring all BrowserConfig parameters within browser-use.
This commit is contained in:
mannaandpoem 2025-03-12 20:26:14 +08:00 committed by GitHub
commit cfdeb3ad4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 116 additions and 4 deletions

View File

@ -1,7 +1,7 @@
import threading import threading
import tomllib import tomllib
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict, List, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -25,8 +25,28 @@ class LLMSettings(BaseModel):
api_version: str = Field(..., description="Azure Openai version if AzureOpenai") api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
class ProxySettings(BaseModel):
server: str = Field(None, description="Proxy server address")
username: Optional[str] = Field(None, description="Proxy username")
password: Optional[str] = Field(None, description="Proxy password")
class BrowserSettings(BaseModel):
headless: bool = Field(False, description="Whether to run browser in headless mode")
disable_security: bool = Field(True, description="Disable browser security features")
extra_chromium_args: List[str] = Field(default_factory=list, description="Extra arguments to pass to the browser")
chrome_instance_path: Optional[str] = Field(None, description="Path to a Chrome instance to use")
wss_url: Optional[str] = Field(None, description="Connect to a browser instance via WebSocket")
cdp_url: Optional[str] = Field(None, description="Connect to a browser instance via CDP")
proxy: Optional[ProxySettings] = Field(None, description="Proxy settings for the browser")
class AppConfig(BaseModel): class AppConfig(BaseModel):
llm: Dict[str, LLMSettings] llm: Dict[str, LLMSettings]
browser_config: Optional[BrowserSettings] = Field(None, description="Browser configuration")
class Config:
arbitrary_types_allowed = True
class Config: class Config:
@ -82,6 +102,35 @@ class Config:
"api_version": base_llm.get("api_version", ""), "api_version": base_llm.get("api_version", ""),
} }
# handle browser config.
browser_config = raw_config.get("browser", {})
browser_settings = None
if browser_config:
# handle proxy settings.
proxy_config = browser_config.get("proxy", {})
proxy_settings = None
if proxy_config and proxy_config.get("server"):
proxy_settings = ProxySettings(**{
k: v for k, v in proxy_config.items()
if k in ["server", "username", "password"] and v
})
# filter valid browser config parameters.
valid_browser_params = {
k: v for k, v in browser_config.items()
if k in BrowserSettings.__annotations__ and v is not None
}
# if there is proxy settings, add it to the parameters.
if proxy_settings:
valid_browser_params["proxy"] = proxy_settings
# only create BrowserSettings when there are valid parameters.
if valid_browser_params:
browser_settings = BrowserSettings(**valid_browser_params)
config_dict = { config_dict = {
"llm": { "llm": {
"default": default_settings, "default": default_settings,
@ -89,7 +138,8 @@ class Config:
name: {**default_settings, **override_config} name: {**default_settings, **override_config}
for name, override_config in llm_overrides.items() for name, override_config in llm_overrides.items()
}, },
} },
"browser_config": browser_settings,
} }
self._config = AppConfig(**config_dict) self._config = AppConfig(**config_dict)
@ -98,5 +148,9 @@ class Config:
def llm(self) -> Dict[str, LLMSettings]: def llm(self) -> Dict[str, LLMSettings]:
return self._config.llm return self._config.llm
@property
def browser_config(self) -> Optional[BrowserSettings]:
return self._config.browser_config
config = Config() config = Config()

View File

@ -5,10 +5,12 @@ from typing import Optional
from browser_use import Browser as BrowserUseBrowser from browser_use import Browser as BrowserUseBrowser
from browser_use import BrowserConfig from browser_use import BrowserConfig
from browser_use.browser.context import BrowserContext from browser_use.browser.context import BrowserContext
from browser_use.browser.context import BrowserContextConfig
from browser_use.dom.service import DomService from browser_use.dom.service import DomService
from pydantic import Field, field_validator from pydantic import Field, field_validator
from pydantic_core.core_schema import ValidationInfo from pydantic_core.core_schema import ValidationInfo
from app.config import config
from app.tool.base import BaseTool, ToolResult from app.tool.base import BaseTool, ToolResult
MAX_LENGTH = 2000 MAX_LENGTH = 2000
@ -104,10 +106,44 @@ class BrowserUseTool(BaseTool):
async def _ensure_browser_initialized(self) -> BrowserContext: async def _ensure_browser_initialized(self) -> BrowserContext:
"""Ensure browser and context are initialized.""" """Ensure browser and context are initialized."""
if self.browser is None: if self.browser is None:
self.browser = BrowserUseBrowser(BrowserConfig(headless=False)) browser_config_kwargs = {"headless": False}
if config.browser_config:
from browser_use.browser.browser import ProxySettings
# handle proxy settings.
if config.browser_config.proxy and config.browser_config.proxy.server:
browser_config_kwargs["proxy"] = ProxySettings(
server=config.browser_config.proxy.server,
username=config.browser_config.proxy.username,
password=config.browser_config.proxy.password
)
browser_attrs = [
"headless", "disable_security", "extra_chromium_args",
"chrome_instance_path", "wss_url", "cdp_url"
]
for attr in browser_attrs:
value = getattr(config.browser_config, attr, None)
if value is not None:
if not isinstance(value, list) or value:
browser_config_kwargs[attr] = value
self.browser = BrowserUseBrowser(BrowserConfig(**browser_config_kwargs))
if self.context is None: if self.context is None:
self.context = await self.browser.new_context() context_config = BrowserContextConfig()
# if there is context config in the config, use it.
if (config.browser_config and
hasattr(config.browser_config, 'new_context_config') and
config.browser_config.new_context_config):
context_config = config.browser_config.new_context_config
self.context = await self.browser.new_context(context_config)
self.dom_service = DomService(await self.context.get_current_page()) self.dom_service = DomService(await self.context.get_current_page())
return self.context return self.context
async def execute( async def execute(

View File

@ -20,3 +20,25 @@ temperature = 0.0
model = "claude-3-5-sonnet" model = "claude-3-5-sonnet"
base_url = "https://api.openai.com/v1" base_url = "https://api.openai.com/v1"
api_key = "sk-..." api_key = "sk-..."
# Optional configuration for specific browser configuration
# [browser]
# Whether to run browser in headless mode (default: false)
#headless = false
# Disable browser security features (default: true)
#disable_security = true
# Extra arguments to pass to the browser
#extra_chromium_args = []
# Path to a Chrome instance to use to connect to your normal browser
# e.g. '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
#chrome_instance_path = ""
# Connect to a browser instance via WebSocket
#wss_url = ""
# Connect to a browser instance via CDP
#cdp_url = ""
# Optional configuration, Proxy settings for the browser
# [browser.proxy]
# server = "http://proxy-server:port"
# username = "proxy-username"
# password = "proxy-password"