Skip to content

Commit f057912

Browse files
committed
Address review: simplify temperature condition, document temperature pin, add boundary test
- Drop redundant 'config.temperature == 0' clause in the Anthropic temperature override; the inverse intent ('only force when caller did not pin a value') is now expressed cleanly with a single is-None check. - Document why the temperature=0.3 pin in doc_extract_query_task bypasses the Anthropic override today (model is also OpenAI-pinned) and what to gate on if the model becomes column-configurable. - Add ClassifyNoneResultTests.test_repeats_below_threshold_are_not_tool_loop to pin the _TOOL_LOOP_THRESHOLD - 1 boundary.
1 parent f55c763 commit f057912

3 files changed

Lines changed: 23 additions & 3 deletions

File tree

opencontractserver/llms/agents/pydantic_ai_agents.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,12 +1352,12 @@ async def _structured_response_raw(
13521352
# of committing to the structured output when given any wiggle
13531353
# room (issue #1381). Force temperature down to 0 unless the
13541354
# caller explicitly asked for something else (function-level
1355-
# temperature pin OR a non-zero config.temperature).
1355+
# temperature pin OR an explicit config.temperature).
13561356
effective_model = model or self.config.model_name
13571357
if (
13581358
_is_anthropic_model(effective_model)
13591359
and temperature is None
1360-
and (self.config.temperature is None or self.config.temperature == 0)
1360+
and self.config.temperature is None
13611361
):
13621362
logger.info(
13631363
"Forcing temperature=0 for structured extraction with "

opencontractserver/tasks/data_extract_tasks.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,14 @@ def sync_add_sources(datacell, sources):
390390
prompt=prompt,
391391
target_type=output_type,
392392
framework=AgentFramework.PYDANTIC_AI,
393-
temperature=0.3, # Low temperature for consistent extraction
393+
# Low temperature for consistent extraction. Note: this
394+
# function-level pin bypasses the Anthropic temperature=0
395+
# override in `_structured_response_raw` (issue #1381).
396+
# Safe today because the model is also pinned to
397+
# `openai:gpt-4o-mini`; if the model becomes column-
398+
# configurable, gate this temperature on the model family
399+
# so Claude variants still get the override.
400+
temperature=0.3,
394401
similarity_top_k=similarity_top_k,
395402
model="openai:gpt-4o-mini", # Fast and reliable
396403
user_id=datacell.creator.id,

opencontractserver/tests/test_data_extract_failure_classification.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,19 @@ def test_repeated_tool_call_classifies_as_tool_loop(self) -> None:
8686
]
8787
self.assertEqual(_classify_none_result(messages), NONE_RESULT_TOOL_LOOP)
8888

89+
def test_repeats_below_threshold_are_not_tool_loop(self) -> None:
90+
"""Two repeats (threshold - 1) ⇒ no_final_response, not tool_loop.
91+
92+
Pins the boundary so a future tweak of ``_TOOL_LOOP_THRESHOLD``
93+
forces this test to be updated explicitly.
94+
"""
95+
repeated = _tool_call("similarity_search", {"query": "same"})
96+
messages = [
97+
_make_response(repeated),
98+
_make_response(repeated),
99+
]
100+
self.assertEqual(_classify_none_result(messages), NONE_RESULT_NO_FINAL)
101+
89102
def test_loop_then_final_is_committed_not_loop(self) -> None:
90103
"""If the agent eventually commits, that wins over loop detection."""
91104
repeated = _tool_call("similarity_search", {"query": "loop"})

0 commit comments

Comments
 (0)