Merge pull request #839 from fred913/main

fix(browser_use_tool): reimplement screenshot logics to get JPEG data
This commit is contained in:
Sheng Fan 2025-03-19 17:03:15 +08:00 committed by GitHub
commit f25ed7d49e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,4 +1,5 @@
import asyncio
import base64
import json
from typing import Generic, Optional, TypeVar
@ -552,7 +553,16 @@ Page content:
viewport_height = ctx.config.browser_window_size.get("height", 0)
# Take a screenshot for the state
screenshot = await ctx.take_screenshot(full_page=True)
page = await ctx.get_current_page()
await page.bring_to_front()
await page.wait_for_load_state()
screenshot = await page.screenshot(
full_page=True, animations="disabled", type="jpeg", quality=100
)
screenshot = base64.b64encode(screenshot).decode("utf-8")
# Build the state info with all required fields
state_info = {