Auto-screenshot on tool error

JE-Chen · JE-Chen · commit 97e707563f21 · 2026-04-25T19:54:04.000+08:00
When JE_AUTOCONTROL_MCP_ERROR_SHOTS is set to a directory, every
failed tools/call triggers a debug screenshot saved as
&lt;tool&gt;_&lt;ts&gt;.png under that path. The artifact path is appended to
both the error message returned to the model and the audit JSONL
record, giving a one-step forensic trail for flaky automations.
Disabled by default — costs nothing when the env var is unset.
diff --git a/je_auto_control/utils/mcp_server/audit.py b/je_auto_control/utils/mcp_server/audit.py
@@ -35,7 +35,8 @@ def enabled(self) -> bool:
 
     def record(self, *, tool: str, arguments: Dict[str, Any],
                status: str, duration_seconds: float,
-               error_text: Optional[str] = None) -> None:
+               error_text: Optional[str] = None,
+               artifact_path: Optional[str] = None) -> None:
         """Append one audit entry. No-ops when no path is configured."""
         if self._path is None:
             return
@@ -48,6 +49,8 @@ def record(self, *, tool: str, arguments: Dict[str, Any],
         }
         if error_text is not None:
             entry["error"] = error_text
+        if artifact_path is not None:
+            entry["artifact_path"] = artifact_path
         line = json.dumps(entry, ensure_ascii=False, default=str)
         with self._lock:
             with open(self._path, "a", encoding="utf-8") as handle:
diff --git a/je_auto_control/utils/mcp_server/server.py b/je_auto_control/utils/mcp_server/server.py
@@ -8,6 +8,7 @@
 """
 import itertools
 import json
+import os
 import sys
 import threading
 import time
@@ -479,14 +480,18 @@ def _handle_tools_call(self, msg_id: Any,
         except (OSError, RuntimeError, ValueError, TypeError,
                 AttributeError, KeyError, NotImplementedError) as error:
             autocontrol_logger.warning("MCP tool %s failed: %r", name, error)
+            artifact = _capture_error_screenshot(name)
             self._audit.record(
                 tool=name, arguments=arguments, status="error",
                 duration_seconds=time.monotonic() - started_at,
                 error_text=f"{type(error).__name__}: {error}",
+                artifact_path=artifact,
             )
+            error_text = f"{type(error).__name__}: {error}"
+            if artifact is not None:
+                error_text += f"\n(error screenshot saved to {artifact})"
             return {
-                "content": [{"type": "text",
-                             "text": f"{type(error).__name__}: {error}"}],
+                "content": [{"type": "text", "text": error_text}],
                 "isError": True,
             }
         finally:
@@ -581,6 +586,33 @@ def _stringify_result(value: Any) -> str:
         return repr(value)
 
 
+def _capture_error_screenshot(tool_name: str) -> Optional[str]:
+    """Save a debug screenshot when JE_AUTOCONTROL_MCP_ERROR_SHOTS is set."""
+    debug_dir = os.environ.get("JE_AUTOCONTROL_MCP_ERROR_SHOTS")
+    if not debug_dir:
+        return None
+    target_dir = os.path.realpath(os.fspath(debug_dir))
+    try:
+        os.makedirs(target_dir, exist_ok=True)
+    except OSError as error:
+        autocontrol_logger.info(
+            "MCP error-screenshot dir unavailable: %r", error,
+        )
+        return None
+    filename = f"{tool_name}_{int(time.time() * 1000)}.png"
+    path = os.path.join(target_dir, filename)
+    try:
+        from je_auto_control.utils.cv2_utils.screenshot import pil_screenshot
+        pil_screenshot(file_path=path)
+    except (OSError, RuntimeError, ValueError, AttributeError,
+            ImportError) as error:
+        autocontrol_logger.info(
+            "MCP failed to capture error screenshot: %r", error,
+        )
+        return None
+    return path
+
+
 def _file_uri_to_path(uri: str) -> Optional[str]:
     """Convert a ``file://`` URI to a local filesystem path; ``None`` otherwise."""
     if not isinstance(uri, str) or not uri.startswith("file://"):
diff --git a/test/unit_test/headless/test_mcp_server.py b/test/unit_test/headless/test_mcp_server.py
@@ -1404,6 +1404,75 @@ def test_launch_process_rejects_empty_argv():
         raise AssertionError("expected ValueError for empty argv")
 
 
+def test_auto_screenshot_on_error_skipped_when_env_unset(monkeypatch, tmp_path):
+    monkeypatch.delenv("JE_AUTOCONTROL_MCP_ERROR_SHOTS", raising=False)
+    from je_auto_control.utils.mcp_server.audit import AuditLogger
+    audit = AuditLogger(path=str(tmp_path / "audit.jsonl"))
+
+    def boom(x):
+        raise RuntimeError("nope")
+
+    tool = MCPTool(
+        name="boom", description="boom",
+        input_schema={"type": "object", "properties": {
+            "x": {"type": "integer"}}, "required": ["x"]},
+        handler=boom,
+    )
+    server = MCPServer(tools=[tool], audit_logger=audit)
+    server.handle_line(_request("tools/call", params={
+        "name": "boom", "arguments": {"x": 1},
+    }))
+    record = json.loads(open(audit.path, encoding="utf-8").readline())
+    assert "artifact_path" not in record
+
+
+def test_auto_screenshot_on_error_writes_file_when_env_set(
+        monkeypatch, tmp_path):
+    """When the env var is set we capture a screenshot via pil_screenshot."""
+    debug_dir = tmp_path / "shots"
+    monkeypatch.setenv("JE_AUTOCONTROL_MCP_ERROR_SHOTS", str(debug_dir))
+
+    saved_paths = []
+
+    def fake_screenshot(file_path=None, screen_region=None):
+        saved_paths.append(file_path)
+        # Touch the file so the audit record's path actually exists.
+        if file_path is not None:
+            open(file_path, "wb").close()
+
+        class _Stub:
+            def save(self, *_args, **_kwargs):
+                return None
+
+            size = (1, 1)
+        return _Stub()
+
+    import je_auto_control.utils.cv2_utils.screenshot as screenshot_module
+    monkeypatch.setattr(screenshot_module, "pil_screenshot", fake_screenshot)
+
+    from je_auto_control.utils.mcp_server.audit import AuditLogger
+    audit = AuditLogger(path=str(tmp_path / "audit.jsonl"))
+
+    def boom(x):
+        raise RuntimeError("nope")
+
+    tool = MCPTool(
+        name="boom2", description="boom2",
+        input_schema={"type": "object", "properties": {
+            "x": {"type": "integer"}}, "required": ["x"]},
+        handler=boom,
+    )
+    server = MCPServer(tools=[tool], audit_logger=audit)
+    response = _decode(server.handle_line(_request("tools/call", params={
+        "name": "boom2", "arguments": {"x": 1},
+    })))
+    assert response["result"]["isError"] is True
+    assert "error screenshot saved to" in response["result"]["content"][0]["text"]
+    record = json.loads(open(audit.path, encoding="utf-8").readline())
+    assert record["artifact_path"]
+    assert saved_paths
+
+
 def test_default_registry_lists_core_automation_tools():
     names = {tool.name for tool in build_default_tool_registry()}
     expected = {