Skip to content

Commit b50be3b

Browse files
committed
Add LLM action planner with Anthropic backend
plan_actions() turns a natural-language description into a validated AC_* action list by asking an LLM (Anthropic Claude by default) to emit JSON constrained to the executor's known commands. Output is parsed leniently (strips code fences, extracts the first JSON array from prose) and then validated by the same schema the executor uses, so callers can pipe the result straight into execute_action. Backend selection mirrors utils/vision: an LLMBackend protocol with an Anthropic implementation and a null fallback that fails fast when no key or SDK is present. AC_llm_plan / AC_llm_run executor commands expose the flow to JSON action files, the socket server, and the MCP bridge.
1 parent 5f28c3d commit b50be3b

9 files changed

Lines changed: 539 additions & 0 deletions

File tree

je_auto_control/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
locate_text_center, read_text_in_region, set_tesseract_cmd,
6464
wait_for_text,
6565
)
66+
# LLM action planner (headless)
67+
from je_auto_control.utils.llm import (
68+
LLMBackend, LLMNotAvailableError, LLMPlanError,
69+
plan_actions, run_from_description,
70+
)
6671
# MCP server (headless stdio bridge for Claude / other MCP clients)
6772
from je_auto_control.utils.mcp_server import (
6873
AuditLogger, HttpMCPServer, MCPContent, MCPPrompt, MCPPromptArgument,
@@ -250,6 +255,9 @@ def start_autocontrol_gui(*args, **kwargs):
250255
"click_accessibility_element",
251256
# VLM locator
252257
"VLMNotAvailableError", "locate_by_description", "click_by_description",
258+
# LLM action planner
259+
"LLMBackend", "LLMNotAvailableError", "LLMPlanError",
260+
"plan_actions", "run_from_description",
253261
"generate_html", "generate_html_report", "generate_json", "generate_json_report", "generate_xml",
254262
"generate_xml_report", "get_dir_files_as_list", "create_project_dir", "start_autocontrol_socket_server",
255263
"callback_executor", "package_manager", "ShellManager", "default_shell_manager",

je_auto_control/utils/executor/action_executor.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
BLOCK_COMMANDS, LoopBreak, LoopContinue,
2424
)
2525
from je_auto_control.utils.executor.mouse_aliases import MOUSE_BUTTON_COMMANDS
26+
from je_auto_control.utils.llm.planner import (
27+
plan_actions as llm_plan_actions,
28+
run_from_description as llm_run_from_description,
29+
)
2630
from je_auto_control.utils.ocr.ocr_engine import (
2731
click_text as ocr_click_text,
2832
find_text_regex as ocr_find_text_regex,
@@ -97,6 +101,34 @@ def _vlm_locate_as_list(description: str,
97101
return None if coords is None else [coords[0], coords[1]]
98102

99103

104+
def _llm_plan_for_executor(description: str,
105+
examples: Optional[list] = None,
106+
model: Optional[str] = None,
107+
max_tokens: int = 2048) -> list:
108+
"""Executor adapter: plan without executing, using current command set."""
109+
return llm_plan_actions(
110+
description,
111+
known_commands=executor.known_commands(),
112+
examples=examples,
113+
model=model,
114+
max_tokens=int(max_tokens),
115+
)
116+
117+
118+
def _llm_run_for_executor(description: str,
119+
examples: Optional[list] = None,
120+
model: Optional[str] = None,
121+
max_tokens: int = 2048) -> Dict[str, Any]:
122+
"""Executor adapter: plan and execute against the global executor."""
123+
return llm_run_from_description(
124+
description,
125+
executor=executor,
126+
examples=examples,
127+
model=model,
128+
max_tokens=int(max_tokens),
129+
)
130+
131+
100132
def _ocr_read_region_as_dicts(region: Optional[List[int]] = None,
101133
lang: str = "eng",
102134
min_confidence: float = 60.0) -> List[dict]:
@@ -260,6 +292,10 @@ def __init__(self):
260292
# MCP server (Model Context Protocol stdio bridge)
261293
"AC_start_mcp_server": start_mcp_stdio_server,
262294
"AC_start_mcp_http_server": start_mcp_http_server,
295+
296+
# LLM action planner
297+
"AC_llm_plan": _llm_plan_for_executor,
298+
"AC_llm_run": _llm_run_for_executor,
263299
}
264300

265301
def known_commands(self) -> set:
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""LLM-driven natural-language → action-list planning.
2+
3+
The planner asks an LLM (default: Anthropic Claude) to translate a
4+
description like ``"open Notepad, type hello, save as test.txt"`` into a
5+
validated JSON action list using the executor's known ``AC_*`` commands.
6+
The result is structurally validated before it is returned, so callers can
7+
feed it straight into the executor.
8+
"""
9+
from je_auto_control.utils.llm.backends import (
10+
LLMBackend, LLMNotAvailableError, get_backend, reset_backend_cache,
11+
)
12+
from je_auto_control.utils.llm.planner import (
13+
LLMPlanError, plan_actions, run_from_description,
14+
)
15+
16+
__all__ = [
17+
"LLMBackend", "LLMNotAvailableError", "LLMPlanError",
18+
"get_backend", "reset_backend_cache",
19+
"plan_actions", "run_from_description",
20+
]
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""LLM backend factory.
2+
3+
Mirrors :mod:`je_auto_control.utils.vision.backends`: backends declare
4+
``available`` and ``complete()``; the factory picks the first ready
5+
candidate based on env vars and an optional preference. A null backend is
6+
returned when nothing is configured so callers can detect the situation
7+
through :class:`LLMNotAvailableError` rather than ``ImportError``.
8+
"""
9+
import os
10+
from typing import Optional
11+
12+
from je_auto_control.utils.llm.backends.base import (
13+
LLMBackend, LLMNotAvailableError,
14+
)
15+
from je_auto_control.utils.llm.backends.null_backend import NullLLMBackend
16+
17+
_cached_backend: Optional[LLMBackend] = None
18+
19+
20+
def get_backend() -> LLMBackend:
21+
"""Return (and cache) an LLM backend chosen by env vars."""
22+
global _cached_backend
23+
if _cached_backend is not None:
24+
return _cached_backend
25+
_cached_backend = _build_backend()
26+
return _cached_backend
27+
28+
29+
def reset_backend_cache() -> None:
30+
"""Force ``get_backend()`` to re-detect on its next call."""
31+
global _cached_backend
32+
_cached_backend = None
33+
34+
35+
def _build_backend() -> LLMBackend:
36+
preferred = os.environ.get("AUTOCONTROL_LLM_BACKEND", "").lower()
37+
for candidate in _preference_order(preferred):
38+
backend = _try_build(candidate)
39+
if backend is not None and backend.available:
40+
return backend
41+
return NullLLMBackend(
42+
"no LLM backend ready; set ANTHROPIC_API_KEY and install the "
43+
"matching SDK (anthropic)",
44+
)
45+
46+
47+
def _preference_order(preferred: str):
48+
if preferred == "anthropic":
49+
return ("anthropic",)
50+
if os.environ.get("ANTHROPIC_API_KEY"):
51+
return ("anthropic",)
52+
return ("anthropic",)
53+
54+
55+
def _try_build(name: str) -> Optional[LLMBackend]:
56+
if name == "anthropic":
57+
from je_auto_control.utils.llm.backends.anthropic_backend import (
58+
AnthropicLLMBackend,
59+
)
60+
return AnthropicLLMBackend()
61+
return None
62+
63+
64+
__all__ = [
65+
"LLMBackend", "LLMNotAvailableError", "NullLLMBackend",
66+
"get_backend", "reset_backend_cache",
67+
]
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Anthropic (Claude) text-completion backend for the action planner."""
2+
import os
3+
from typing import Optional
4+
5+
from je_auto_control.utils.llm.backends.base import LLMBackend
6+
from je_auto_control.utils.logging.logging_instance import autocontrol_logger
7+
8+
_DEFAULT_MODEL = "claude-opus-4-7"
9+
_REQUEST_TIMEOUT_S = 60.0
10+
11+
12+
class AnthropicLLMBackend(LLMBackend):
13+
"""Call ``claude-*`` chat models via the ``anthropic`` Python SDK."""
14+
15+
name = "anthropic"
16+
17+
def __init__(self) -> None:
18+
self._client = None
19+
try:
20+
import anthropic # noqa: F401
21+
except ImportError:
22+
self.available = False
23+
return
24+
if not os.environ.get("ANTHROPIC_API_KEY"):
25+
self.available = False
26+
return
27+
try:
28+
from anthropic import Anthropic
29+
self._client = Anthropic()
30+
self.available = True
31+
except (ImportError, ValueError, RuntimeError) as error:
32+
autocontrol_logger.warning(
33+
"Anthropic LLM client init failed: %r", error,
34+
)
35+
self.available = False
36+
37+
def complete(self, prompt: str,
38+
system: Optional[str] = None,
39+
model: Optional[str] = None,
40+
max_tokens: int = 2048) -> str:
41+
if not self.available or self._client is None:
42+
return ""
43+
chosen_model = (model
44+
or os.environ.get("AUTOCONTROL_LLM_MODEL")
45+
or _DEFAULT_MODEL)
46+
kwargs = {
47+
"model": chosen_model,
48+
"max_tokens": int(max_tokens),
49+
"timeout": _REQUEST_TIMEOUT_S,
50+
"messages": [{"role": "user", "content": prompt}],
51+
}
52+
if system:
53+
kwargs["system"] = system
54+
try:
55+
response = self._client.messages.create(**kwargs)
56+
except (OSError, ValueError, RuntimeError) as error:
57+
autocontrol_logger.warning(
58+
"Anthropic LLM request failed: %r", error,
59+
)
60+
return ""
61+
return _join_text_blocks(response)
62+
63+
64+
def _join_text_blocks(response) -> str:
65+
"""Concatenate every text block in an Anthropic response."""
66+
parts = []
67+
for block in getattr(response, "content", []) or []:
68+
if getattr(block, "type", None) == "text":
69+
text = getattr(block, "text", "") or ""
70+
if text:
71+
parts.append(text)
72+
return "".join(parts)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""Common protocol shared by every LLM backend."""
2+
from typing import Optional
3+
4+
5+
class LLMNotAvailableError(RuntimeError):
6+
"""Raised when no LLM backend is configured / reachable."""
7+
8+
9+
class LLMBackend:
10+
"""Minimal text-completion contract used by the action planner."""
11+
12+
name: str = "base"
13+
available: bool = False
14+
15+
def complete(self, prompt: str,
16+
system: Optional[str] = None,
17+
model: Optional[str] = None,
18+
max_tokens: int = 2048) -> str:
19+
"""Return the model's text response for ``prompt``.
20+
21+
Backends should return an empty string (not raise) on transient
22+
failures so the planner can surface a deterministic error.
23+
"""
24+
raise NotImplementedError
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Fallback LLM backend used when nothing real is configured."""
2+
from typing import Optional
3+
4+
from je_auto_control.utils.llm.backends.base import (
5+
LLMBackend, LLMNotAvailableError,
6+
)
7+
8+
9+
class NullLLMBackend(LLMBackend):
10+
"""Always raises so callers fail fast with a clear message."""
11+
12+
name = "null"
13+
available = False
14+
15+
def __init__(self, reason: str) -> None:
16+
self._reason = reason
17+
18+
def complete(self, prompt: str,
19+
system: Optional[str] = None,
20+
model: Optional[str] = None,
21+
max_tokens: int = 2048) -> str:
22+
del prompt, system, model, max_tokens
23+
raise LLMNotAvailableError(self._reason)

0 commit comments

Comments
 (0)