chrome-devtools-mcp/src/tools/screenshots.py at 8d8bfdaf2a1a7802a3e8fe4054d582bceb3f81a7 · benjaminr/chrome-devtools-mcp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python3
"""Screenshot Capture Tools

This module provides page screenshot capabilities through the DevTools Protocol.
It enables capturing full page or region screenshots in various formats and saves
them to files that can be analyzed by vision-capable LLMs.

Key Features:
    - Full page and viewport screenshot capture
    - Region-based screenshot clipping
    - Multiple output formats (PNG, JPEG, WebP)
    - Quality compression control for JPEG/WebP
    - Automatic file saving with timestamp-based naming
    - Custom filename support

Example:
    Capturing screenshots:

    ```python
    # Capture current viewport as PNG
    result = await take_screenshot()

    # Capture full scrollable page as JPEG
    result = await take_screenshot(format="jpeg", quality=80, full_page=True)

    # Capture specific region
    result = await take_screenshot(clip_x=100, clip_y=100, clip_width=400, clip_height=300)

    # Custom filename
    result = await take_screenshot(filename="my_screenshot")
    ```

Note:
    All screenshot operations require an active connection to Chrome with the
    Page domain enabled. Screenshots are saved to a temp directory and the file
    path is returned for LLM analysis.
"""

from __future__ import annotations

import base64
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Any

from mcp.server.fastmcp import FastMCP

from ..cdp_context import require_cdp_client
from .utils import create_error_response, create_success_response

# Screenshots directory in system temp
SCREENSHOTS_DIR = Path(tempfile.gettempdir()) / "chrome-devtools-screenshots"


def register_screenshot_tools(mcp: FastMCP) -> None:
    """Register screenshot capture tools with the MCP server.

    Args:
        mcp: FastMCP server instance to register tools with.

    Registered Tools:
        - take_screenshot: Capture page screenshots and save to file for LLM analysis
    """

    @mcp.tool()
    @require_cdp_client
    async def take_screenshot(
        format: str = "png",
        quality: int | None = None,
        full_page: bool = False,
        clip_x: float | None = None,
        clip_y: float | None = None,
        clip_width: float | None = None,
        clip_height: float | None = None,
        optimize_for_speed: bool = False,
        filename: str | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Capture a screenshot of the current page and save to file.

        Takes a screenshot of the current page state and saves it to a file in the
        system temp directory. Returns the file path so vision-capable LLMs can
        analyze the image. Supports multiple formats, quality control, full page
        capture, and region clipping.

        Args:
            format: Image format - "png", "jpeg", or "webp" (default: png).
                   PNG is lossless, JPEG/WebP support quality compression.
            quality: Compression quality 0-100 for jpeg/webp formats.
                    Higher values mean better quality but larger files.
                    Ignored for PNG format. Default is browser default (~80).
            full_page: If True, captures the full scrollable page content
                      beyond the visible viewport (default: False).
            clip_x: X coordinate (from top-left) for region capture.
                   Must be provided with clip_y, clip_width, clip_height.
            clip_y: Y coordinate (from top-left) for region capture.
            clip_width: Width of the region to capture.
            clip_height: Height of the region to capture.
            optimize_for_speed: If True, optimizes for capture speed over
                               file size (default: False).
            filename: Custom filename without extension. If not provided,
                     defaults to timestamp-based name (screenshot_YYYYMMDD_HHMMSS).

        Returns:
            Success response with file_path that can be read by vision LLMs,
            or error response if capture fails.

        Example:
            # Basic viewport screenshot
            take_screenshot()

            # Full page with compression
            take_screenshot(format="jpeg", quality=70, full_page=True)

            # Capture specific region
            take_screenshot(clip_x=100, clip_y=200, clip_width=300, clip_height=150)

            # Custom filename
            take_screenshot(filename="login_page_error")
        """
        try:
            cdp_client = kwargs["cdp_client"]

            # Validate format
            valid_formats = ("png", "jpeg", "webp")
            format_lower = format.lower()
            if format_lower not in valid_formats:
                return create_error_response(
                    f"Invalid format '{format}'",
                    details=f"Supported formats: {', '.join(valid_formats)}",
                )

            # Validate quality parameter
            if quality is not None:
                if format_lower == "png":
                    return create_error_response(
                        "Quality parameter not supported for PNG format",
                        details="Use 'jpeg' or 'webp' format for quality control",
                    )
                if not (0 <= quality <= 100):
                    return create_error_response(
                        f"Invalid quality value: {quality}",
                        details="Quality must be between 0 and 100",
                    )

            # Build CDP command parameters
            params: dict[str, Any] = {
                "format": format_lower,
            }

            # Add quality for JPEG/WebP
            if quality is not None and format_lower in ("jpeg", "webp"):
                params["quality"] = quality

            # Add full page capture
            if full_page:
                params["captureBeyondViewport"] = True

            # Add clip region if all coordinates provided
            clip_params = [clip_x, clip_y, clip_width, clip_height]
            if any(p is not None for p in clip_params):
                if not all(p is not None for p in clip_params):
                    return create_error_response(
                        "Incomplete clip region",
                        details="All clip parameters (clip_x, clip_y, clip_width, clip_height) "
                        "must be provided together",
                    )
                params["clip"] = {
                    "x": clip_x,
                    "y": clip_y,
                    "width": clip_width,
                    "height": clip_height,
                    "scale": 1,
                }

            # Add speed optimization
            if optimize_for_speed:
                params["optimizeForSpeed"] = True

            # Capture screenshot via CDP
            result = await cdp_client.send_command("Page.captureScreenshot", params)

            # Decode base64 data
            screenshot_base64 = result["data"]
            screenshot_bytes = base64.b64decode(screenshot_base64)

            # Create screenshots directory if it doesn't exist
            SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)

            # Generate filename
            if filename:
                file_name = f"{filename}.{format_lower}"
            else:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                file_name = f"screenshot_{timestamp}.{format_lower}"

            # Write to file
            file_path = SCREENSHOTS_DIR / file_name
            file_path.write_bytes(screenshot_bytes)

            return create_success_response(
                message=f"Screenshot saved to {file_path}",
                data={
                    "file_path": str(file_path),
                    "format": format_lower,
                    "size_bytes": len(screenshot_bytes),
                    "full_page": full_page,
                    "filename": file_name,
                },
            )

        except Exception as e:
            return create_error_response(f"Screenshot capture failed: {e}")