Add LLM action planner with Anthropic backend

JE-Chen · JE-Chen · commit b50be3b08265 · 2026-04-26T17:39:22.000+08:00
plan_actions() turns a natural-language description into a validated AC_*
action list by asking an LLM (Anthropic Claude by default) to emit JSON
constrained to the executor's known commands. Output is parsed leniently
(strips code fences, extracts the first JSON array from prose) and then
validated by the same schema the executor uses, so callers can pipe the
result straight into execute_action.

Backend selection mirrors utils/vision: an LLMBackend protocol with an
Anthropic implementation and a null fallback that fails fast when no key
or SDK is present. AC_llm_plan / AC_llm_run executor commands expose the
flow to JSON action files, the socket server, and the MCP bridge.
diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py
@@ -63,6 +63,11 @@
     locate_text_center, read_text_in_region, set_tesseract_cmd,
     wait_for_text,
 )
+# LLM action planner (headless)
+from je_auto_control.utils.llm import (
+    LLMBackend, LLMNotAvailableError, LLMPlanError,
+    plan_actions, run_from_description,
+)
 # MCP server (headless stdio bridge for Claude / other MCP clients)
 from je_auto_control.utils.mcp_server import (
     AuditLogger, HttpMCPServer, MCPContent, MCPPrompt, MCPPromptArgument,
@@ -250,6 +255,9 @@ def start_autocontrol_gui(*args, **kwargs):
     "click_accessibility_element",
     # VLM locator
     "VLMNotAvailableError", "locate_by_description", "click_by_description",
+    # LLM action planner
+    "LLMBackend", "LLMNotAvailableError", "LLMPlanError",
+    "plan_actions", "run_from_description",
     "generate_html", "generate_html_report", "generate_json", "generate_json_report", "generate_xml",
     "generate_xml_report", "get_dir_files_as_list", "create_project_dir", "start_autocontrol_socket_server",
     "callback_executor", "package_manager", "ShellManager", "default_shell_manager",
diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py
@@ -23,6 +23,10 @@
     BLOCK_COMMANDS, LoopBreak, LoopContinue,
 )
 from je_auto_control.utils.executor.mouse_aliases import MOUSE_BUTTON_COMMANDS
+from je_auto_control.utils.llm.planner import (
+    plan_actions as llm_plan_actions,
+    run_from_description as llm_run_from_description,
+)
 from je_auto_control.utils.ocr.ocr_engine import (
     click_text as ocr_click_text,
     find_text_regex as ocr_find_text_regex,
@@ -97,6 +101,34 @@ def _vlm_locate_as_list(description: str,
     return None if coords is None else [coords[0], coords[1]]
 
 
+def _llm_plan_for_executor(description: str,
+                           examples: Optional[list] = None,
+                           model: Optional[str] = None,
+                           max_tokens: int = 2048) -> list:
+    """Executor adapter: plan without executing, using current command set."""
+    return llm_plan_actions(
+        description,
+        known_commands=executor.known_commands(),
+        examples=examples,
+        model=model,
+        max_tokens=int(max_tokens),
+    )
+
+
+def _llm_run_for_executor(description: str,
+                          examples: Optional[list] = None,
+                          model: Optional[str] = None,
+                          max_tokens: int = 2048) -> Dict[str, Any]:
+    """Executor adapter: plan and execute against the global executor."""
+    return llm_run_from_description(
+        description,
+        executor=executor,
+        examples=examples,
+        model=model,
+        max_tokens=int(max_tokens),
+    )
+
+
 def _ocr_read_region_as_dicts(region: Optional[List[int]] = None,
                               lang: str = "eng",
                               min_confidence: float = 60.0) -> List[dict]:
@@ -260,6 +292,10 @@ def __init__(self):
             # MCP server (Model Context Protocol stdio bridge)
             "AC_start_mcp_server": start_mcp_stdio_server,
             "AC_start_mcp_http_server": start_mcp_http_server,
+
+            # LLM action planner
+            "AC_llm_plan": _llm_plan_for_executor,
+            "AC_llm_run": _llm_run_for_executor,
         }
 
     def known_commands(self) -> set:
diff --git a/je_auto_control/utils/llm/__init__.py b/je_auto_control/utils/llm/__init__.py
@@ -0,0 +1,20 @@
+"""LLM-driven natural-language → action-list planning.
+
+The planner asks an LLM (default: Anthropic Claude) to translate a
+description like ``"open Notepad, type hello, save as test.txt"`` into a
+validated JSON action list using the executor's known ``AC_*`` commands.
+The result is structurally validated before it is returned, so callers can
+feed it straight into the executor.
+"""
+from je_auto_control.utils.llm.backends import (
+    LLMBackend, LLMNotAvailableError, get_backend, reset_backend_cache,
+)
+from je_auto_control.utils.llm.planner import (
+    LLMPlanError, plan_actions, run_from_description,
+)
+
+__all__ = [
+    "LLMBackend", "LLMNotAvailableError", "LLMPlanError",
+    "get_backend", "reset_backend_cache",
+    "plan_actions", "run_from_description",
+]
diff --git a/je_auto_control/utils/llm/backends/__init__.py b/je_auto_control/utils/llm/backends/__init__.py
@@ -0,0 +1,67 @@
+"""LLM backend factory.
+
+Mirrors :mod:`je_auto_control.utils.vision.backends`: backends declare
+``available`` and ``complete()``; the factory picks the first ready
+candidate based on env vars and an optional preference. A null backend is
+returned when nothing is configured so callers can detect the situation
+through :class:`LLMNotAvailableError` rather than ``ImportError``.
+"""
+import os
+from typing import Optional
+
+from je_auto_control.utils.llm.backends.base import (
+    LLMBackend, LLMNotAvailableError,
+)
+from je_auto_control.utils.llm.backends.null_backend import NullLLMBackend
+
+_cached_backend: Optional[LLMBackend] = None
+
+
+def get_backend() -> LLMBackend:
+    """Return (and cache) an LLM backend chosen by env vars."""
+    global _cached_backend
+    if _cached_backend is not None:
+        return _cached_backend
+    _cached_backend = _build_backend()
+    return _cached_backend
+
+
+def reset_backend_cache() -> None:
+    """Force ``get_backend()`` to re-detect on its next call."""
+    global _cached_backend
+    _cached_backend = None
+
+
+def _build_backend() -> LLMBackend:
+    preferred = os.environ.get("AUTOCONTROL_LLM_BACKEND", "").lower()
+    for candidate in _preference_order(preferred):
+        backend = _try_build(candidate)
+        if backend is not None and backend.available:
+            return backend
+    return NullLLMBackend(
+        "no LLM backend ready; set ANTHROPIC_API_KEY and install the "
+        "matching SDK (anthropic)",
+    )
+
+
+def _preference_order(preferred: str):
+    if preferred == "anthropic":
+        return ("anthropic",)
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        return ("anthropic",)
+    return ("anthropic",)
+
+
+def _try_build(name: str) -> Optional[LLMBackend]:
+    if name == "anthropic":
+        from je_auto_control.utils.llm.backends.anthropic_backend import (
+            AnthropicLLMBackend,
+        )
+        return AnthropicLLMBackend()
+    return None
+
+
+__all__ = [
+    "LLMBackend", "LLMNotAvailableError", "NullLLMBackend",
+    "get_backend", "reset_backend_cache",
+]
diff --git a/je_auto_control/utils/llm/backends/anthropic_backend.py b/je_auto_control/utils/llm/backends/anthropic_backend.py
@@ -0,0 +1,72 @@
+"""Anthropic (Claude) text-completion backend for the action planner."""
+import os
+from typing import Optional
+
+from je_auto_control.utils.llm.backends.base import LLMBackend
+from je_auto_control.utils.logging.logging_instance import autocontrol_logger
+
+_DEFAULT_MODEL = "claude-opus-4-7"
+_REQUEST_TIMEOUT_S = 60.0
+
+
+class AnthropicLLMBackend(LLMBackend):
+    """Call ``claude-*`` chat models via the ``anthropic`` Python SDK."""
+
+    name = "anthropic"
+
+    def __init__(self) -> None:
+        self._client = None
+        try:
+            import anthropic  # noqa: F401
+        except ImportError:
+            self.available = False
+            return
+        if not os.environ.get("ANTHROPIC_API_KEY"):
+            self.available = False
+            return
+        try:
+            from anthropic import Anthropic
+            self._client = Anthropic()
+            self.available = True
+        except (ImportError, ValueError, RuntimeError) as error:
+            autocontrol_logger.warning(
+                "Anthropic LLM client init failed: %r", error,
+            )
+            self.available = False
+
+    def complete(self, prompt: str,
+                 system: Optional[str] = None,
+                 model: Optional[str] = None,
+                 max_tokens: int = 2048) -> str:
+        if not self.available or self._client is None:
+            return ""
+        chosen_model = (model
+                        or os.environ.get("AUTOCONTROL_LLM_MODEL")
+                        or _DEFAULT_MODEL)
+        kwargs = {
+            "model": chosen_model,
+            "max_tokens": int(max_tokens),
+            "timeout": _REQUEST_TIMEOUT_S,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+        if system:
+            kwargs["system"] = system
+        try:
+            response = self._client.messages.create(**kwargs)
+        except (OSError, ValueError, RuntimeError) as error:
+            autocontrol_logger.warning(
+                "Anthropic LLM request failed: %r", error,
+            )
+            return ""
+        return _join_text_blocks(response)
+
+
+def _join_text_blocks(response) -> str:
+    """Concatenate every text block in an Anthropic response."""
+    parts = []
+    for block in getattr(response, "content", []) or []:
+        if getattr(block, "type", None) == "text":
+            text = getattr(block, "text", "") or ""
+            if text:
+                parts.append(text)
+    return "".join(parts)
diff --git a/je_auto_control/utils/llm/backends/base.py b/je_auto_control/utils/llm/backends/base.py
@@ -0,0 +1,24 @@
+"""Common protocol shared by every LLM backend."""
+from typing import Optional
+
+
+class LLMNotAvailableError(RuntimeError):
+    """Raised when no LLM backend is configured / reachable."""
+
+
+class LLMBackend:
+    """Minimal text-completion contract used by the action planner."""
+
+    name: str = "base"
+    available: bool = False
+
+    def complete(self, prompt: str,
+                 system: Optional[str] = None,
+                 model: Optional[str] = None,
+                 max_tokens: int = 2048) -> str:
+        """Return the model's text response for ``prompt``.
+
+        Backends should return an empty string (not raise) on transient
+        failures so the planner can surface a deterministic error.
+        """
+        raise NotImplementedError
diff --git a/je_auto_control/utils/llm/backends/null_backend.py b/je_auto_control/utils/llm/backends/null_backend.py
@@ -0,0 +1,23 @@
+"""Fallback LLM backend used when nothing real is configured."""
+from typing import Optional
+
+from je_auto_control.utils.llm.backends.base import (
+    LLMBackend, LLMNotAvailableError,
+)
+
+
+class NullLLMBackend(LLMBackend):
+    """Always raises so callers fail fast with a clear message."""
+
+    name = "null"
+    available = False
+
+    def __init__(self, reason: str) -> None:
+        self._reason = reason
+
+    def complete(self, prompt: str,
+                 system: Optional[str] = None,
+                 model: Optional[str] = None,
+                 max_tokens: int = 2048) -> str:
+        del prompt, system, model, max_tokens
+        raise LLMNotAvailableError(self._reason)
diff --git a/je_auto_control/utils/llm/planner.py b/je_auto_control/utils/llm/planner.py
diff --git a/test/unit_test/headless/test_llm_planner.py b/test/unit_test/headless/test_llm_planner.py