Merge pull request #758 from cyzus/feat-fix-browser-use-click

fix browser click
This commit is contained in:
Isaac 2025-03-17 20:16:21 +08:00 committed by GitHub
commit 8659f324ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -45,6 +45,7 @@ class BrowserUseTool(BaseTool):
"enum": [ "enum": [
"navigate", "navigate",
"click", "click",
"get_current_state",
"input_text", "input_text",
"screenshot", "screenshot",
"get_html", "get_html",
@ -64,7 +65,7 @@ class BrowserUseTool(BaseTool):
}, },
"index": { "index": {
"type": "integer", "type": "integer",
"description": "Element index for 'click' or 'input_text' actions", "description": "Element index (retrieved using get_current_state) for 'click' or 'input_text' actions",
}, },
"text": {"type": "string", "description": "Text for 'input_text' action"}, "text": {"type": "string", "description": "Text for 'input_text' action"},
"script": { "script": {
@ -201,6 +202,9 @@ class BrowserUseTool(BaseTool):
output += f" - Downloaded file to {download_path}" output += f" - Downloaded file to {download_path}"
return ToolResult(output=output) return ToolResult(output=output)
elif action == "get_current_state":
return await self.get_current_state(context)
elif action == "input_text": elif action == "input_text":
if index is None or not text: if index is None or not text:
return ToolResult( return ToolResult(
@ -287,21 +291,22 @@ class BrowserUseTool(BaseTool):
except Exception as e: except Exception as e:
return ToolResult(error=f"Browser action '{action}' failed: {str(e)}") return ToolResult(error=f"Browser action '{action}' failed: {str(e)}")
async def get_current_state(self) -> ToolResult: async def get_current_state(self, context: BrowserContext) -> ToolResult:
"""Get the current browser state as a ToolResult.""" """Get the current browser state as a ToolResult."""
async with self.lock: try:
try: state = await context.get_state()
context = await self._ensure_browser_initialized() state_info = {
state = await context.get_state() "url": state.url,
state_info = { "title": state.title,
"url": state.url, "tabs": [tab.model_dump() for tab in state.tabs],
"title": state.title, "help": "[0], [1], [2], etc., represent clickable indices corresponding to the elements listed. Clicking on these indices will navigate to or interact with the respective content behind them.",
"tabs": [tab.model_dump() for tab in state.tabs], "interactive_elements": state.element_tree.clickable_elements_to_string(),
"interactive_elements": state.element_tree.clickable_elements_to_string(), }
} return ToolResult(
return ToolResult(output=json.dumps(state_info)) output=json.dumps(state_info, indent=4, ensure_ascii=False)
except Exception as e: )
return ToolResult(error=f"Failed to get browser state: {str(e)}") except Exception as e:
return ToolResult(error=f"Failed to get browser state: {str(e)}")
async def cleanup(self): async def cleanup(self):
"""Clean up browser resources.""" """Clean up browser resources."""