-
Notifications
You must be signed in to change notification settings - Fork 47
Expand file tree
/
Copy pathscreenshots.py
More file actions
214 lines (175 loc) · 7.91 KB
/
screenshots.py
File metadata and controls
214 lines (175 loc) · 7.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python3
"""Screenshot Capture Tools
This module provides page screenshot capabilities through the DevTools Protocol.
It enables capturing full page or region screenshots in various formats and saves
them to files that can be analyzed by vision-capable LLMs.
Key Features:
- Full page and viewport screenshot capture
- Region-based screenshot clipping
- Multiple output formats (PNG, JPEG, WebP)
- Quality compression control for JPEG/WebP
- Automatic file saving with timestamp-based naming
- Custom filename support
Example:
Capturing screenshots:
```python
# Capture current viewport as PNG
result = await take_screenshot()
# Capture full scrollable page as JPEG
result = await take_screenshot(format="jpeg", quality=80, full_page=True)
# Capture specific region
result = await take_screenshot(clip_x=100, clip_y=100, clip_width=400, clip_height=300)
# Custom filename
result = await take_screenshot(filename="my_screenshot")
```
Note:
All screenshot operations require an active connection to Chrome with the
Page domain enabled. Screenshots are saved to a temp directory and the file
path is returned for LLM analysis.
"""
from __future__ import annotations
import base64
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Any
from mcp.server.fastmcp import FastMCP
from ..cdp_context import require_cdp_client
from .utils import create_error_response, create_success_response
# Screenshots directory in system temp
SCREENSHOTS_DIR = Path(tempfile.gettempdir()) / "chrome-devtools-screenshots"
def register_screenshot_tools(mcp: FastMCP) -> None:
"""Register screenshot capture tools with the MCP server.
Args:
mcp: FastMCP server instance to register tools with.
Registered Tools:
- take_screenshot: Capture page screenshots and save to file for LLM analysis
"""
@mcp.tool()
@require_cdp_client
async def take_screenshot(
format: str = "png",
quality: int | None = None,
full_page: bool = False,
clip_x: float | None = None,
clip_y: float | None = None,
clip_width: float | None = None,
clip_height: float | None = None,
optimize_for_speed: bool = False,
filename: str | None = None,
**kwargs: Any,
) -> dict[str, Any]:
"""Capture a screenshot of the current page and save to file.
Takes a screenshot of the current page state and saves it to a file in the
system temp directory. Returns the file path so vision-capable LLMs can
analyze the image. Supports multiple formats, quality control, full page
capture, and region clipping.
Args:
format: Image format - "png", "jpeg", or "webp" (default: png).
PNG is lossless, JPEG/WebP support quality compression.
quality: Compression quality 0-100 for jpeg/webp formats.
Higher values mean better quality but larger files.
Ignored for PNG format. Default is browser default (~80).
full_page: If True, captures the full scrollable page content
beyond the visible viewport (default: False).
clip_x: X coordinate (from top-left) for region capture.
Must be provided with clip_y, clip_width, clip_height.
clip_y: Y coordinate (from top-left) for region capture.
clip_width: Width of the region to capture.
clip_height: Height of the region to capture.
optimize_for_speed: If True, optimizes for capture speed over
file size (default: False).
filename: Custom filename without extension. If not provided,
defaults to timestamp-based name (screenshot_YYYYMMDD_HHMMSS).
Returns:
Success response with file_path that can be read by vision LLMs,
or error response if capture fails.
Example:
# Basic viewport screenshot
take_screenshot()
# Full page with compression
take_screenshot(format="jpeg", quality=70, full_page=True)
# Capture specific region
take_screenshot(clip_x=100, clip_y=200, clip_width=300, clip_height=150)
# Custom filename
take_screenshot(filename="login_page_error")
"""
try:
cdp_client = kwargs["cdp_client"]
# Validate format
valid_formats = ("png", "jpeg", "webp")
format_lower = format.lower()
if format_lower not in valid_formats:
return create_error_response(
f"Invalid format '{format}'",
details=f"Supported formats: {', '.join(valid_formats)}",
)
# Validate quality parameter
if quality is not None:
if format_lower == "png":
return create_error_response(
"Quality parameter not supported for PNG format",
details="Use 'jpeg' or 'webp' format for quality control",
)
if not (0 <= quality <= 100):
return create_error_response(
f"Invalid quality value: {quality}",
details="Quality must be between 0 and 100",
)
# Build CDP command parameters
params: dict[str, Any] = {
"format": format_lower,
}
# Add quality for JPEG/WebP
if quality is not None and format_lower in ("jpeg", "webp"):
params["quality"] = quality
# Add full page capture
if full_page:
params["captureBeyondViewport"] = True
# Add clip region if all coordinates provided
clip_params = [clip_x, clip_y, clip_width, clip_height]
if any(p is not None for p in clip_params):
if not all(p is not None for p in clip_params):
return create_error_response(
"Incomplete clip region",
details="All clip parameters (clip_x, clip_y, clip_width, clip_height) "
"must be provided together",
)
params["clip"] = {
"x": clip_x,
"y": clip_y,
"width": clip_width,
"height": clip_height,
"scale": 1,
}
# Add speed optimization
if optimize_for_speed:
params["optimizeForSpeed"] = True
# Capture screenshot via CDP
result = await cdp_client.send_command("Page.captureScreenshot", params)
# Decode base64 data
screenshot_base64 = result["data"]
screenshot_bytes = base64.b64decode(screenshot_base64)
# Create screenshots directory if it doesn't exist
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
# Generate filename
if filename:
file_name = f"{filename}.{format_lower}"
else:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"screenshot_{timestamp}.{format_lower}"
# Write to file
file_path = SCREENSHOTS_DIR / file_name
file_path.write_bytes(screenshot_bytes)
return create_success_response(
message=f"Screenshot saved to {file_path}",
data={
"file_path": str(file_path),
"format": format_lower,
"size_bytes": len(screenshot_bytes),
"full_page": full_page,
"filename": file_name,
},
)
except Exception as e:
return create_error_response(f"Screenshot capture failed: {e}")