|
1 | | -""" |
2 | | -LLM / agent integration constants. |
| 1 | +"""LLM / agent integration constants (issue #1381).""" |
3 | 2 |
|
4 | | -Anthropic structured-extraction reliability knobs and the failure-mode |
5 | | -classifier vocabulary that ``data_extract_tasks._classify_none_result`` |
6 | | -emits to ``Datacell.stacktrace``. Lives here per CLAUDE.md "no magic |
7 | | -numbers in business code" rule so operators can grep canonical values |
8 | | -instead of chasing literals across modules. |
9 | | -""" |
10 | | - |
11 | | -from typing import Optional |
12 | | - |
13 | | -# Retry budget passed to ``PydanticAIAgent`` for structured extraction. |
14 | | -# pydantic-ai's default is 1; Claude/Anthropic models routinely fail to |
15 | | -# call ``final_result`` on the first turn for sparse documents and we |
16 | | -# observed an ~85% failure rate without retries. 3 strikes the right |
17 | | -# balance: enough to absorb a single missed-tool-call attempt with a |
18 | | -# follow-up reminder, without blowing the per-cell wall-clock budget. |
| 3 | +# pydantic-ai default is 1; Anthropic models often need retries to commit |
| 4 | +# to ``final_result``. Bumping this requires re-checking ``TOOL_LOOP_THRESHOLD`` |
| 5 | +# below — a legitimate retried tool call could be mis-classified as a loop |
| 6 | +# if the threshold is lower than the retry budget. |
19 | 7 | STRUCTURED_OUTPUT_RETRIES = 3 |
20 | 8 |
|
21 | | -# Threshold for declaring a tool loop in ``_classify_none_result``. If |
22 | | -# the same ``(tool_name, args)`` signature appears at least this many |
23 | | -# times in the captured pydantic-ai message log without a matching |
24 | | -# ``final_result`` call, the cell is classified as |
25 | | -# ``NONE_RESULT_TOOL_LOOP`` (integration failure, NOT a "data absent" |
26 | | -# answer). Distinct from ``STRUCTURED_OUTPUT_RETRIES`` despite the |
27 | | -# coincidental equality — the retry budget is a pydantic-ai input, |
28 | | -# this threshold is a post-mortem heuristic. |
| 9 | +# Same-call repetition count that ``_classify_none_result`` treats as a |
| 10 | +# pipeline bug (post-mortem heuristic, not a pydantic-ai input). Keep |
| 11 | +# >= STRUCTURED_OUTPUT_RETRIES so legitimate retries don't trip it. |
29 | 12 | TOOL_LOOP_THRESHOLD = 3 |
30 | 13 |
|
31 | | -# Failure-mode classification vocabulary written to ``Datacell.stacktrace`` |
32 | | -# when extraction returns ``None``. Operators grep ``failure_mode=`` to |
33 | | -# separate legitimate "data not present" outcomes from pipeline bugs; |
34 | | -# changing these strings is a breaking change for downstream dashboards. |
| 14 | +# Vocabulary written to ``Datacell.stacktrace`` as ``failure_mode=...``. |
| 15 | +# Operators grep these; changing the strings breaks downstream dashboards. |
35 | 16 | NONE_RESULT_AGENT_COMMITTED = "agent_committed_none" |
36 | 17 | NONE_RESULT_NO_FINAL = "no_final_response" |
37 | 18 | NONE_RESULT_TOOL_LOOP = "tool_loop_no_output" |
38 | 19 | NONE_RESULT_UNKNOWN = "unknown" |
39 | 20 |
|
40 | | -# Default model for ``doc_extract_query_task``. Co-located with |
41 | | -# ``EXTRACT_DEFAULT_TEMPERATURE`` and the ``is_anthropic_model`` helper |
42 | | -# below so the model/family/temperature relationship lives in one place. |
43 | | -# Call sites must pass ``temperature=None`` when the model family is |
44 | | -# Anthropic so the structured-extraction guard in |
45 | | -# ``_structured_response_raw`` can apply ``temperature=0`` automatically |
46 | | -# (issue #1381). |
47 | 21 | EXTRACT_DEFAULT_MODEL = "openai:gpt-4o-mini" |
48 | 22 | EXTRACT_DEFAULT_TEMPERATURE = 0.3 |
49 | | - |
50 | | - |
51 | | -def is_anthropic_model(model_name: Optional[str]) -> bool: |
52 | | - """Return True if ``model_name`` looks like an Anthropic / Claude model. |
53 | | -
|
54 | | - Accepts both pydantic-ai-style ``"anthropic:..."`` prefixes and bare |
55 | | - model names containing ``"claude"``. Lives in ``constants/llm.py`` |
56 | | - rather than in an agent module because call sites outside the agents |
57 | | - layer (notably ``data_extract_tasks.doc_extract_query_task``) need to |
58 | | - decide whether to pass ``temperature=None`` so the Anthropic guard in |
59 | | - ``_structured_response_raw`` activates. Pure stateless string check — |
60 | | - no imports beyond ``typing``. |
61 | | - """ |
62 | | - if not model_name: |
63 | | - return False |
64 | | - name = model_name.lower() |
65 | | - return name.startswith("anthropic:") or "claude" in name |
0 commit comments