Skip to content

Commit 81b4c68

Browse files
committed
Address review: clarify control flow, respect config.temperature, durable error msgs
- Initialize llm_log/messages at top of try-block so outer except branches don't depend on locals() introspection - Replace 'X in locals()' guards with simple None checks - Anthropic temperature override now respects a non-zero config.temperature instead of silently overriding - Failure messages reference llm_call_log instead of issue #1381 (durable phrasing) - Tool-loop fingerprinting uses json.dumps(sort_keys=True, default=str) before falling back to repr - Test updated to match the durable phrasing
1 parent d7487dd commit 81b4c68

3 files changed

Lines changed: 30 additions & 23 deletions

File tree

opencontractserver/llms/agents/pydantic_ai_agents.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,15 +1351,19 @@ async def _structured_response_raw(
13511351
# Anthropic models tend to keep narrating / calling tools instead
13521352
# of committing to the structured output when given any wiggle
13531353
# room (issue #1381). Force temperature down to 0 unless the
1354-
# caller explicitly asked for something else.
1354+
# caller explicitly asked for something else (function-level
1355+
# temperature pin OR a non-zero config.temperature).
13551356
effective_model = model or self.config.model_name
1356-
if _is_anthropic_model(effective_model) and temperature is None:
1357-
if self.config.temperature is None or self.config.temperature > 0:
1358-
logger.info(
1359-
"Forcing temperature=0 for structured extraction with "
1360-
"Anthropic model %s (issue #1381).",
1361-
effective_model,
1362-
)
1357+
if (
1358+
_is_anthropic_model(effective_model)
1359+
and temperature is None
1360+
and (self.config.temperature is None or self.config.temperature == 0)
1361+
):
1362+
logger.info(
1363+
"Forcing temperature=0 for structured extraction with "
1364+
"Anthropic model %s (issue #1381).",
1365+
effective_model,
1366+
)
13631367
model_settings["temperature"] = 0
13641368

13651369
# Seed tools from the main agent so the structured run has the same capabilities

opencontractserver/tasks/data_extract_tasks.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,11 @@ def _classify_none_result(messages: Optional[list[Any]]) -> str:
8383
if tool_name.startswith("final_result"):
8484
saw_final_result = True
8585
else:
86-
args_repr = repr(getattr(part, "args", None))
86+
raw_args = getattr(part, "args", None)
87+
try:
88+
args_repr = json.dumps(raw_args, sort_keys=True, default=str)
89+
except (TypeError, ValueError):
90+
args_repr = repr(raw_args)
8791
tool_call_signatures.append((tool_name, args_repr))
8892

8993
if saw_final_result:
@@ -113,13 +117,15 @@ def _failure_message_for_classification(classification: str) -> str:
113117
"The extraction agent never produced a final structured response. "
114118
"This is an integration failure (the model exhausted its tool-use "
115119
"budget without committing to the result tool), not a statement "
116-
"about the document. See issue #1381."
120+
"about the document. Check ``llm_call_log`` for the raw message "
121+
"history."
117122
)
118123
if classification == NONE_RESULT_TOOL_LOOP:
119124
return (
120125
"The extraction agent looped on the same tool call without "
121126
"producing a final structured response. This is an integration "
122-
"failure, not a statement about the document. See issue #1381."
127+
"failure, not a statement about the document. Check "
128+
"``llm_call_log`` for the repeated tool call."
123129
)
124130
return (
125131
"The extraction returned None and the cause could not be classified. "
@@ -269,8 +275,9 @@ def sync_add_sources(datacell, sources):
269275
if annotation_ids:
270276
datacell.sources.add(*annotation_ids)
271277

272-
# Initialize datacell to None to avoid UnboundLocalError
278+
# Initialize to None to avoid UnboundLocalError in the outer except block
273279
datacell = None
280+
llm_log: Optional[str] = None
274281

275282
logger.info("=" * 60)
276283
logger.info(f"doc_extract_query_task STARTED for cell_id: {cell_id}")
@@ -371,7 +378,7 @@ def sync_add_sources(datacell, sources):
371378
logger.info(f" - Corpus ID: {corpus_id}")
372379

373380
# Capture LLM messages for debugging
374-
llm_log = None
381+
messages: Optional[list[Any]] = None
375382

376383
try:
377384
# Wrap the agent call in the context manager to capture messages
@@ -396,7 +403,7 @@ def sync_add_sources(datacell, sources):
396403

397404
except Exception as e:
398405
# If we have messages, capture them before re-raising
399-
if "messages" in locals():
406+
if messages:
400407
llm_log = ModelMessagesTypeAdapter.dump_json(
401408
messages, indent=2
402409
).decode()
@@ -454,9 +461,7 @@ def sync_add_sources(datacell, sources):
454461
# Extraction returned None — classify *why* so operators can
455462
# distinguish legitimate "data not present" outcomes from
456463
# pipeline bugs (issue #1381).
457-
classification = _classify_none_result(
458-
messages if "messages" in locals() else None
459-
)
464+
classification = _classify_none_result(messages)
460465
failure_message = _failure_message_for_classification(classification)
461466
logger.warning(
462467
f"✗ Extraction returned None for cell {cell_id} "
@@ -477,9 +482,7 @@ def sync_add_sources(datacell, sources):
477482
# Only try to mark failed if we have a datacell
478483
if datacell:
479484
# Pass llm_log if we have it
480-
await sync_mark_failed(
481-
datacell, e, tb, llm_log if "llm_log" in locals() else None
482-
)
485+
await sync_mark_failed(datacell, e, tb, llm_log)
483486
else:
484487
logger.error(f"Failed to get datacell for cell_id {cell_id}: {e}\n{tb}")
485488
raise

opencontractserver/tests/test_data_extract_failure_classification.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,12 @@ def test_messages_are_distinct_per_classification(self) -> None:
131131
self.assertIn("integration failure", messages[NONE_RESULT_NO_FINAL])
132132
self.assertIn("looped", messages[NONE_RESULT_TOOL_LOOP])
133133

134-
def test_integration_failure_messages_reference_issue(self) -> None:
135-
"""Operators need to find the GitHub issue from the cell stacktrace."""
134+
def test_integration_failure_messages_reference_log(self) -> None:
135+
"""Operators need a pointer to the raw conversation in the cell stacktrace."""
136136
for classification in (NONE_RESULT_NO_FINAL, NONE_RESULT_TOOL_LOOP):
137137
with self.subTest(classification=classification):
138138
self.assertIn(
139-
"#1381",
139+
"llm_call_log",
140140
_failure_message_for_classification(classification),
141141
)
142142

0 commit comments

Comments
 (0)