Address review: simplify temperature condition, document temperature pin, add boundary test

JSv4 · JSv4 · commit f0579122e6ac · 2026-04-28T00:50:01.000-05:00
- Drop redundant 'config.temperature == 0' clause in the Anthropic
  temperature override; the inverse intent ('only force when caller did
  not pin a value') is now expressed cleanly with a single is-None check.
- Document why the temperature=0.3 pin in doc_extract_query_task
  bypasses the Anthropic override today (model is also OpenAI-pinned)
  and what to gate on if the model becomes column-configurable.
- Add ClassifyNoneResultTests.test_repeats_below_threshold_are_not_tool_loop
  to pin the _TOOL_LOOP_THRESHOLD - 1 boundary.
diff --git a/opencontractserver/llms/agents/pydantic_ai_agents.py b/opencontractserver/llms/agents/pydantic_ai_agents.py
@@ -1352,12 +1352,12 @@ async def _structured_response_raw(
             # of committing to the structured output when given any wiggle
             # room (issue #1381). Force temperature down to 0 unless the
             # caller explicitly asked for something else (function-level
-            # temperature pin OR a non-zero config.temperature).
+            # temperature pin OR an explicit config.temperature).
             effective_model = model or self.config.model_name
             if (
                 _is_anthropic_model(effective_model)
                 and temperature is None
-                and (self.config.temperature is None or self.config.temperature == 0)
+                and self.config.temperature is None
             ):
                 logger.info(
                     "Forcing temperature=0 for structured extraction with "
diff --git a/opencontractserver/tasks/data_extract_tasks.py b/opencontractserver/tasks/data_extract_tasks.py
@@ -390,7 +390,14 @@ def sync_add_sources(datacell, sources):
                     prompt=prompt,
                     target_type=output_type,
                     framework=AgentFramework.PYDANTIC_AI,
-                    temperature=0.3,  # Low temperature for consistent extraction
+                    # Low temperature for consistent extraction. Note: this
+                    # function-level pin bypasses the Anthropic temperature=0
+                    # override in `_structured_response_raw` (issue #1381).
+                    # Safe today because the model is also pinned to
+                    # `openai:gpt-4o-mini`; if the model becomes column-
+                    # configurable, gate this temperature on the model family
+                    # so Claude variants still get the override.
+                    temperature=0.3,
                     similarity_top_k=similarity_top_k,
                     model="openai:gpt-4o-mini",  # Fast and reliable
                     user_id=datacell.creator.id,
diff --git a/opencontractserver/tests/test_data_extract_failure_classification.py b/opencontractserver/tests/test_data_extract_failure_classification.py
@@ -86,6 +86,19 @@ def test_repeated_tool_call_classifies_as_tool_loop(self) -> None:
         ]
         self.assertEqual(_classify_none_result(messages), NONE_RESULT_TOOL_LOOP)
 
+    def test_repeats_below_threshold_are_not_tool_loop(self) -> None:
+        """Two repeats (threshold - 1) ⇒ no_final_response, not tool_loop.
+
+        Pins the boundary so a future tweak of ``_TOOL_LOOP_THRESHOLD``
+        forces this test to be updated explicitly.
+        """
+        repeated = _tool_call("similarity_search", {"query": "same"})
+        messages = [
+            _make_response(repeated),
+            _make_response(repeated),
+        ]
+        self.assertEqual(_classify_none_result(messages), NONE_RESULT_NO_FINAL)
+
     def test_loop_then_final_is_committed_not_loop(self) -> None:
         """If the agent eventually commits, that wins over loop detection."""
         repeated = _tool_call("similarity_search", {"query": "loop"})