|
33 | 33 | from pydantic_graph import End |
34 | 34 |
|
35 | 35 | from opencontractserver.constants.context_guardrails import COMPACTION_SUMMARY_PREFIX |
| 36 | +from opencontractserver.constants.llm import STRUCTURED_OUTPUT_RETRIES |
36 | 37 | from opencontractserver.conversations.models import Conversation |
37 | 38 | from opencontractserver.corpuses.models import Corpus |
38 | 39 | from opencontractserver.documents.models import Document |
|
90 | 91 | PydanticAIAnnotationVectorStore, |
91 | 92 | ) |
92 | 93 | from opencontractserver.utils.embeddings import aget_embedder |
| 94 | +from opencontractserver.utils.llm import is_anthropic_model |
93 | 95 | from opencontractserver.utils.prompt_sanitization import ( |
94 | 96 | UNTRUSTED_CONTENT_NOTICE, |
95 | 97 | fence_user_content, |
@@ -536,10 +538,20 @@ def _build_structured_system_prompt( |
536 | 538 | Subclasses may override this to include document or corpus context. |
537 | 539 | The base implementation intentionally avoids any citation or |
538 | 540 | conversational guidance to minimize iterations and enforce raw output. |
| 541 | +
|
| 542 | + The wording explicitly tells the agent to commit to the final |
| 543 | + structured response after gathering information. Some models (notably |
| 544 | + Anthropic's Claude family) tend to keep narrating or invoking tools |
| 545 | + instead of producing the structured output unless told to stop. See |
| 546 | + issue #1381. |
539 | 547 | """ |
540 | 548 | return ( |
541 | 549 | "You are in data extraction mode.\n" |
542 | 550 | "Use available tools to locate the requested information.\n" |
| 551 | + "After gathering enough information from the tools, you MUST " |
| 552 | + "produce the final structured response by calling the result " |
| 553 | + "tool. Do not narrate further; do not keep invoking tools " |
| 554 | + "indefinitely.\n" |
543 | 555 | "Return ONLY the raw value matching the target type. " |
544 | 556 | "No explanations, no citations, no extra words.\n\n" |
545 | 557 | "SEARCH PROTOCOL:\n" |
@@ -1358,13 +1370,40 @@ async def _structured_response_raw( |
1358 | 1370 | ) |
1359 | 1371 |
|
1360 | 1372 | try: |
1361 | | - # Build model settings with overrides |
| 1373 | + # Build model settings with overrides. |
| 1374 | + # ``_prepare_pydantic_ai_model_settings`` returns ``None`` when |
| 1375 | + # both temperature and max_tokens are unset on ``self.config`` |
| 1376 | + # (the helper signals "no settings to pass" rather than |
| 1377 | + # returning an empty dict). We need a mutable dict here so |
| 1378 | + # the function-level ``temperature`` / ``max_tokens`` overrides |
| 1379 | + # — and the Anthropic temperature-0 nudge below — have |
| 1380 | + # somewhere to land. |
1362 | 1381 | model_settings = _prepare_pydantic_ai_model_settings(self.config) |
| 1382 | + if model_settings is None: |
| 1383 | + model_settings = {} |
1363 | 1384 | if temperature is not None: |
1364 | 1385 | model_settings["temperature"] = temperature |
1365 | 1386 | if max_tokens is not None: |
1366 | 1387 | model_settings["max_tokens"] = max_tokens |
1367 | 1388 |
|
| 1389 | + # Anthropic models tend to keep narrating / calling tools instead |
| 1390 | + # of committing to the structured output when given any wiggle |
| 1391 | + # room (issue #1381). Force temperature down to 0 unless the |
| 1392 | + # caller explicitly asked for something else (function-level |
| 1393 | + # temperature pin OR an explicit config.temperature). |
| 1394 | + effective_model = model or self.config.model_name |
| 1395 | + if ( |
| 1396 | + is_anthropic_model(effective_model) |
| 1397 | + and temperature is None |
| 1398 | + and self.config.temperature is None |
| 1399 | + ): |
| 1400 | + logger.info( |
| 1401 | + "Forcing temperature=0 for structured extraction with " |
| 1402 | + "Anthropic model %s (issue #1381).", |
| 1403 | + effective_model, |
| 1404 | + ) |
| 1405 | + model_settings["temperature"] = 0 |
| 1406 | + |
1368 | 1407 | # Seed tools from the main agent so the structured run has the same capabilities |
1369 | 1408 | seeded_tools_dict = _get_function_tools(self.pydantic_ai_agent) |
1370 | 1409 | seeded_tools = list(seeded_tools_dict.values()) |
@@ -1394,13 +1433,20 @@ async def _structured_response_raw( |
1394 | 1433 |
|
1395 | 1434 | logger.info(f"Structured system prompt: {structured_system_prompt}") |
1396 | 1435 |
|
| 1436 | + # Preserve the pre-issue-#1381 behaviour of passing |
| 1437 | + # ``model_settings=None`` to ``PydanticAIAgent`` when nothing |
| 1438 | + # ended up being set, so non-Anthropic structured runs without |
| 1439 | + # caller pins are bit-identical to before. |
1397 | 1440 | structured_agent = PydanticAIAgent( |
1398 | | - model=model or self.config.model_name, |
| 1441 | + model=effective_model, |
1399 | 1442 | instructions=structured_system_prompt, |
1400 | 1443 | output_type=target_type, |
1401 | 1444 | deps_type=PydanticAIDependencies, |
1402 | 1445 | tools=final_tools, |
1403 | | - model_settings=model_settings, |
| 1446 | + model_settings=model_settings or None, |
| 1447 | + # Give pydantic-ai room to retry the structured output when |
| 1448 | + # the model fails to commit on the first pass (issue #1381). |
| 1449 | + output_retries=STRUCTURED_OUTPUT_RETRIES, |
1404 | 1450 | ) |
1405 | 1451 |
|
1406 | 1452 | # Include prior conversation context if available |
@@ -1995,8 +2041,12 @@ def _build_structured_system_prompt( |
1995 | 2041 | "search for likely answer phrasings). A single failed search is NOT " |
1996 | 2042 | "sufficient evidence that the information is missing — most legal " |
1997 | 2043 | "documents need multiple targeted queries to surface a relevant span.\n" |
1998 | | - "4. Return ONLY the raw extracted value matching the target type.\n" |
1999 | | - "5. No explanations, no citations, no commentary – just the data.\n\n" |
| 2044 | + "4. After gathering enough information from the tools, you MUST " |
| 2045 | + "commit to the final structured response by calling the result " |
| 2046 | + "tool. Do not narrate further; do not keep invoking tools " |
| 2047 | + "indefinitely.\n" |
| 2048 | + "5. Return ONLY the raw extracted value matching the target type.\n" |
| 2049 | + "6. No explanations, no citations, no commentary – just the data.\n\n" |
2000 | 2050 | "Only return null/None after multiple search attempts have all " |
2001 | 2051 | "failed to find relevant content." |
2002 | 2052 | ) |
@@ -2509,8 +2559,12 @@ def _build_structured_system_prompt( |
2509 | 2559 | "search for likely answer phrasings). A single failed search is NOT " |
2510 | 2560 | "sufficient evidence that the information is missing — most legal " |
2511 | 2561 | "documents need multiple targeted queries to surface a relevant span.\n" |
2512 | | - "4. Return ONLY the raw extracted value matching the target type.\n" |
2513 | | - "5. No explanations, no citations, no commentary – just the data.\n\n" |
| 2562 | + "4. After gathering enough information from the tools, you MUST " |
| 2563 | + "commit to the final structured response by calling the result " |
| 2564 | + "tool. Do not narrate further; do not keep invoking tools " |
| 2565 | + "indefinitely.\n" |
| 2566 | + "5. Return ONLY the raw extracted value matching the target type.\n" |
| 2567 | + "6. No explanations, no citations, no commentary – just the data.\n\n" |
2514 | 2568 | "Only return null/None after multiple search attempts have all " |
2515 | 2569 | "failed to find relevant content." |
2516 | 2570 | ) |
|
0 commit comments