From 31c0ee9bb2d3fb4e4a24660a715a8846d5c15d23 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 20:32:40 -0500
Subject: [PATCH 1/7] fix(orchestration): harden context token tracking and
 tool-result growth
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Default MaxInTurnToolPairs to 12 when MaxContextTokens is set; without
  this, tool results accumulate O(N²) within a turn for budgeted agents
  that omit explicit in-turn limits
- ShouldCompact (window mode) uses Usage.TotalTokens when available so
  reasoning tokens stripped from Content are counted toward the budget
- AccumulateCompactedUsage folds all compacted-turn costs onto the summary
  message across every compaction path, keeping MaxTotalTokens accurate
  after resume cycles
- TransitionAlreadyFired distinguishes blocked markers from fired ones so
  an A→B contract failure no longer suppresses the independent A→C signal
---
 src/Infrastructure/AgentFactory.cs            |  7 +++-
 src/Orchestration/ConversationCompactor.cs    | 40 +++++++++++++++----
 .../StateMachineSelectionStrategy.cs          | 31 ++++++++++----
 3 files changed, 61 insertions(+), 17 deletions(-)
diff --git a/src/Infrastructure/AgentFactory.cs b/src/Infrastructure/AgentFactory.cs
index 5a0da53..d3cde28 100644
--- a/src/Infrastructure/AgentFactory.cs
+++ b/src/Infrastructure/AgentFactory.cs
@@ -171,7 +171,12 @@ public AIAgent Create(AgentConfig config, Action<string, string, string?>? onToo
         // Deterministic sliding-window cap: always keep only the last N tool call/result
         // pairs in full, replacing older ones with placeholders unconditionally.
         // Applied before the budget-reactive trim so the window runs first.
-        var maxInTurnToolPairs = config.MaxInTurnToolPairs;
+        // When MaxContextTokens is set but no explicit pair limit is configured, default
+        // to 12 pairs to prevent O(N²) tool-result accumulation within a turn.
+        const int DefaultToolPairsWhenBudgeted = 12;
+        var maxInTurnToolPairs = config.MaxInTurnToolPairs > 0
+            ? config.MaxInTurnToolPairs
+            : (resolvedModel.MaxContextTokens > 0 ? DefaultToolPairsWhenBudgeted : 0);
 
         // Tool schema overhead: computed once at build time since the tool list is fixed
         // for the lifetime of this agent. Included in the context budget and payload
diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs
index 8402c15..86713a5 100644
--- a/src/Orchestration/ConversationCompactor.cs
+++ b/src/Orchestration/ConversationCompactor.cs
@@ -58,7 +58,12 @@ public bool ShouldCompact(IReadOnlyList<AgentMessage> messages)
     {
         if (IsWindowMode)
         {
-            var estimated = messages.Sum(m => (m.Content?.Length ?? 0) / 4);
+            // Prefer provider-reported token counts when available — they include reasoning
+            // tokens that TruncateIntermediateAssistantReasoning strips from Content, so
+            // the char-based estimate would undercount them. Fall back to chars/4 only for
+            // messages that have no Usage record (e.g. injected system messages).
+            var estimated = messages.Sum(m =>
+                m.Usage is { } u ? u.TotalTokens : (m.Content?.Length ?? 0) / 4);
             if (estimated > config.TokenBudget)
             {
                 logger.LogDebug(
@@ -162,6 +167,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                     toCompact[0].TurnIndex, toCompact[^1].TurnIndex, cancellationToken);
                 var intentSummary = BuildIntentDerivedSummary(
                     toCompact[0].TurnIndex, toCompact[^1].TurnIndex, intents, prefixBlock);
+                intentSummary = intentSummary with { Usage = AccumulateCompactedUsage(toCompact, null) };
                 logger.LogInformation(
                     "Intent compaction: {Compacted} turns replaced by intent log reconstruction ({IntentCount} intents).",
                     toCompact.Count, intents.Count);
@@ -185,6 +191,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                 };
             if (ExpandedNote is not null)
                 reconstructed = reconstructed with { Content = reconstructed.Content + "\n\n---\n" + ExpandedNote };
+            reconstructed = reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) };
             logger.LogInformation(
                 "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.",
                 toCompact.Count);
@@ -215,9 +222,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                     Role                = "user",
                     TurnIndex           = toCompact[^1].TurnIndex,
                     IsCompactionSummary = true,
-                    Usage               = summUsage is not null
-                        ? new TokenUsage(summUsage.InputTokens, summUsage.OutputTokens)
-                        : null
+                    Usage               = AccumulateCompactedUsage(toCompact, summUsage)
                 };
 
                 logger.LogInformation(
@@ -231,7 +236,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                 // LLM summary failed; return the lossless reconstruction alone so the session survives.
                 logger.LogError(ex,
                     "Hybrid compaction: LLM summary call failed — returning lossless reconstruction only.");
-                return (reconstructed, toRetain);
+                return (reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) }, toRetain);
             }
         }
 
@@ -256,9 +261,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                 Role                = "user",
                 TurnIndex           = toCompact[^1].TurnIndex,
                 IsCompactionSummary = true,
-                Usage               = summaryUsage is not null
-                    ? new TokenUsage(summaryUsage.InputTokens, summaryUsage.OutputTokens)
-                    : null
+                Usage               = AccumulateCompactedUsage(toCompact, summaryUsage)
             };
 
             logger.LogInformation(
@@ -279,6 +282,27 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
 
     // Internals
 
+    // Sums the token costs of all compacted turns and folds in the summary-call cost.
+    // The total is stored on the summary AgentMessage so AgentOrchestrator can seed
+    // cumulativeTokens correctly on the next StreamAsync call (after resume/compaction),
+    // keeping MaxTotalTokens enforcement accurate across compaction boundaries.
+    private static TokenUsage? AccumulateCompactedUsage(
+        IReadOnlyList<AgentMessage> compacted,
+        TokenUsage? summaryCallUsage)
+    {
+        int totalInput  = summaryCallUsage?.InputTokens  ?? 0;
+        int totalOutput = summaryCallUsage?.OutputTokens ?? 0;
+        foreach (var m in compacted)
+        {
+            if (m.Usage is null) continue;
+            totalInput  += m.Usage.InputTokens;
+            totalOutput += m.Usage.OutputTokens;
+        }
+        return (totalInput > 0 || totalOutput > 0)
+            ? new TokenUsage(totalInput, totalOutput)
+            : null;
+    }
+
     private AgentMessage BuildIntentDerivedSummary(
         int firstTurn,
         int lastTurn,
diff --git a/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs b/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs
index 9c67e3b..122ef71 100644
--- a/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs
+++ b/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs
@@ -772,21 +772,36 @@ private static bool IsSignalOnOwnLine(string content, string signal)
         return false;
     }
 
-    // Returns true when a turn-boundary marker already exists after keywordIndex for
-    // the target state's agent — meaning this signal was consumed in a prior turn.
+    // Returns true when this specific transition was already consumed after signalIndex.
+    //
+    // Two marker types are checked:
+    //   "[fuseraft:blocked {state}→{targetState}]" — the transition was evaluated and
+    //     its contract failed; the signal must not be re-evaluated for that target.
+    //     Markers for OTHER targets do not suppress this transition.
+    //   Any other "[fuseraft: ...]" — a different transition fired, meaning the state
+    //     machine already advanced; the signal is consumed regardless of target.
     private static bool TransitionAlreadyFired(IList<ChatMessage> history, int signalIndex, string targetState)
     {
-        // We look for "[fuseraft: X → Y]" markers after the signal message.
-        // Since we don't know the target agent name from here (only the target state),
-        // we use a simplified check: any turn-boundary marker after this index means
-        // the selector already processed this turn.
         for (int j = signalIndex + 1; j < history.Count; j++)
         {
             var m = history[j];
             if (m.Role != ChatRole.User) continue;
             var text = m.Text;
-            if (!string.IsNullOrEmpty(text) && text.StartsWith("[fuseraft:", StringComparison.Ordinal))
-                return true;
+            if (string.IsNullOrEmpty(text)) continue;
+            if (!text.StartsWith("[fuseraft:", StringComparison.Ordinal)) continue;
+
+            // Blocking markers suppress only the transition they name.
+            // "[fuseraft:blocked A→B]" blocks A→B but must not block A→C.
+            if (text.StartsWith("[fuseraft:blocked ", StringComparison.Ordinal))
+            {
+                if (text.Contains($"→{targetState}", StringComparison.OrdinalIgnoreCase))
+                    return true;
+                continue; // Different target — does not apply to this transition.
+            }
+
+            // Any non-blocking marker means the state machine already acted on a signal
+            // in this lookback window (transition fired or parallel dispatched).
+            return true;
         }
         return false;
     }

From 7a5380750cf41095c953b2abee851eb27ce6be92 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 20:38:11 -0500
Subject: [PATCH 2/7] fix(orchestration): pin corrections through trim and
 context assembly

- Corrections injected mid-session were silently dropped for Context-spec
  agents because AssembleForAgentAsync bypasses shared-history replay;
  ExtractPendingCorrections re-injects any correction messages that appear
  after the agent's last assistant turn so retries always see the feedback
- MaxTailMessages cut by raw position, so earlier corrections on long
  histories could fall outside the retained window; step 4 now pins
  correction messages and applies the tail limit only to non-pinned
  messages, preserving original order
- IsCorrectionMessage is public so both paths share the same detection
  logic (prefix table + [fuseraft:blocked substring)
---
 src/Orchestration/ContextWindowFilter.cs    | 68 ++++++++++++++++++++-
 src/Orchestration/HandoffContextResolver.cs | 37 +++++++++++
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs
index 4809188..0643319 100644
--- a/src/Orchestration/ContextWindowFilter.cs
+++ b/src/Orchestration/ContextWindowFilter.cs
@@ -118,8 +118,37 @@ public static IReadOnlyList<ChatMessage> Apply(
         }
 
         // Step 4: Tail limit — keep only the last N messages.
+        // Correction messages (RETRY, STAGNATION, [fuseraft:blocked, etc.) are pinned so they
+        // always survive the position-based cut. Non-correction messages are trimmed to the tail
+        // window; the final list preserves original message order.
         if (window.MaxTailMessages > 0 && list.Count > window.MaxTailMessages)
-            list = list.Skip(list.Count - window.MaxTailMessages).ToList();
+        {
+            var pinnedSet = new HashSet<int>(
+                Enumerable.Range(0, list.Count).Where(i => IsCorrectionMessage(list[i])));
+
+            if (pinnedSet.Count == 0)
+            {
+                list = list.Skip(list.Count - window.MaxTailMessages).ToList();
+            }
+            else
+            {
+                var unpinnedIndices = Enumerable.Range(0, list.Count)
+                    .Where(i => !pinnedSet.Contains(i))
+                    .ToList();
+
+                int firstKeptUnpinned = unpinnedIndices.Count > window.MaxTailMessages
+                    ? unpinnedIndices[unpinnedIndices.Count - window.MaxTailMessages]
+                    : 0;
+
+                var kept = new List<ChatMessage>(list.Count);
+                for (int i = 0; i < list.Count; i++)
+                {
+                    if (i >= firstKeptUnpinned || pinnedSet.Contains(i))
+                        kept.Add(list[i]);
+                }
+                list = kept;
+            }
+        }
 
         // Step 5: Sanitize tool_use/tool_result pairing at slice boundaries.
         // Steps 3 and 4 cut by position; either cut can land inside a tool-call/result
@@ -342,6 +371,43 @@ private static List<ChatMessage> SanitizeToolPairs(List<ChatMessage> list)
         return result;
     }
 
+    // Prefixes that unambiguously identify a ChatRole.User correction injected by
+    // CorrectionEngine, routing strategies, or the orchestrator's verifier hook.
+    private static readonly string[] CorrectionPrefixes =
+    [
+        "RETRY ",
+        "NO TOOL CALLS",
+        "CRITICAL:",
+        "APPROVED rejected:",
+        "WRONG KEYWORD:",
+        "JSON block correct",
+        "BUILD FAILURE:",
+        "STAGNATION (",
+        "STUCK ",
+        "HALLUCINATION:",
+        "PERSISTENT BUILD FAILURE",
+        "VERIFICATION FINDING",
+        "Files written this turn",
+        "No handoff keyword",
+    ];
+
+    /// <summary>
+    /// Returns <c>true</c> when <paramref name="message"/> is a correction injected by
+    /// <see cref="fuseraft.Orchestration.Workflow.CorrectionEngine"/>, a routing strategy,
+    /// or the orchestrator's verifier hook. Used to pin corrections so they survive
+    /// <see cref="ContextWindowConfig.MaxTailMessages"/> trimming, and to re-inject them
+    /// into assembled agent contexts.
+    /// </summary>
+    public static bool IsCorrectionMessage(ChatMessage message)
+    {
+        if (message.Role != ChatRole.User) return false;
+        var text = message.Text ?? string.Empty;
+        if (text.Contains("[fuseraft:blocked", StringComparison.Ordinal)) return true;
+        foreach (var prefix in CorrectionPrefixes)
+            if (text.StartsWith(prefix, StringComparison.Ordinal)) return true;
+        return false;
+    }
+
     // Maximum number of characters to replay from a single non-summary assistant message.
     // Agents sometimes produce verbose stream-of-consciousness reasoning text (3–5k output
     // tokens). When that text is replayed verbatim in every subsequent turn it causes
diff --git a/src/Orchestration/HandoffContextResolver.cs b/src/Orchestration/HandoffContextResolver.cs
index d5c5602..db395c0 100644
--- a/src/Orchestration/HandoffContextResolver.cs
+++ b/src/Orchestration/HandoffContextResolver.cs
@@ -167,6 +167,14 @@ public async Task<IReadOnlyList<ChatMessage>> AssembleForAgentAsync(
             }
         }
 
+        // 4. Pending corrections — user correction messages injected into shared history after
+        // this agent's last turn. Context-spec agents replace shared-history replay entirely,
+        // so corrections written to shared history (by CorrectionEngine, routing strategies,
+        // or the verifier hook) would otherwise be invisible on the next invocation. Re-inject
+        // them here so the agent always sees the most recent feedback addressed to it.
+        var pendingCorrections = ExtractPendingCorrections(agentName, sharedHistory);
+        result.AddRange(pendingCorrections);
+
         return result;
     }
 
@@ -314,6 +322,35 @@ private static IReadOnlyList<ChatMessage> ExtractOwnHistory(
         return ownTurns.Select(t => t.Msg).ToList();
     }
 
+    // ── Pending-correction extraction ───────────────────────────────────────
+
+    // Returns all correction messages in shared history that appear after the last
+    // assistant turn by agentName. These are unread corrections the agent has not yet
+    // acted on; they must be included in the assembled context so the agent sees them.
+    private static IReadOnlyList<ChatMessage> ExtractPendingCorrections(
+        string agentName,
+        IList<ChatMessage> history)
+    {
+        int lastOwnIdx = -1;
+        for (int i = history.Count - 1; i >= 0; i--)
+        {
+            if (history[i].Role == ChatRole.Assistant &&
+                string.Equals(history[i].AuthorName, agentName, StringComparison.OrdinalIgnoreCase))
+            {
+                lastOwnIdx = i;
+                break;
+            }
+        }
+
+        var corrections = new List<ChatMessage>();
+        for (int i = lastOwnIdx + 1; i < history.Count; i++)
+        {
+            if (ContextWindowFilter.IsCorrectionMessage(history[i]))
+                corrections.Add(history[i]);
+        }
+        return corrections;
+    }
+
     // ── Helpers ──────────────────────────────────────────────────────────────
 
     private static (string Type, string? Param) ParseSource(string source)

From fcadecd198f99f6b22d2a472970a794b61ff0e0b Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 20:43:29 -0500
Subject: [PATCH 3/7] feat(orchestration): per-tool result caps and tool call
 compaction

- Global MaxToolResultChars cut search/grep at the same limit as file
  reads; ToolResultCharOverrides lets callers raise the cap per tool
  without relaxing the global default
- ToolCallRecord entries were silently lost for compacted turns;
  AccumulateCompactedToolCalls now wires them into all five summary
  paths so telemetry and BuildModifiedFilesNote stay accurate
---
 src/Core/Models/ContextWindowConfig.cs     | 19 ++++++++++
 src/Orchestration/ContextWindowFilter.cs   | 42 +++++++++++++++++++---
 src/Orchestration/ConversationCompactor.cs | 38 +++++++++++++++++---
 3 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/src/Core/Models/ContextWindowConfig.cs b/src/Core/Models/ContextWindowConfig.cs
index e6154ca..f262f44 100644
--- a/src/Core/Models/ContextWindowConfig.cs
+++ b/src/Core/Models/ContextWindowConfig.cs
@@ -105,4 +105,23 @@ public sealed record ContextWindowConfig
     /// Default: <c>0</c> (no truncation).
     /// </summary>
     public int MaxToolResultChars { get; init; }
+
+    /// <summary>
+    /// Per-tool-name character limit overrides applied during tool result truncation.
+    /// When a key matches a tool function name (case-insensitive), its value is used as the
+    /// character cap for that tool's results instead of <see cref="MaxToolResultChars"/>.
+    ///
+    /// <para>
+    /// The primary use case is giving search and grep tools a higher limit than file-read
+    /// tools. For example:
+    /// <code>
+    /// "ToolResultCharOverrides": { "search_content": 20000, "grep_file": 20000 }
+    /// </code>
+    /// A value of <c>0</c> disables truncation for that tool entirely.
+    /// </para>
+    ///
+    /// Only meaningful when <see cref="MaxToolResultChars"/> is also set.
+    /// Default: empty (no overrides).
+    /// </summary>
+    public Dictionary<string, int> ToolResultCharOverrides { get; init; } = [];
 }
diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs
index 0643319..266db0e 100644
--- a/src/Orchestration/ContextWindowFilter.cs
+++ b/src/Orchestration/ContextWindowFilter.cs
@@ -164,7 +164,7 @@ public static IReadOnlyList<ChatMessage> Apply(
         // When MaxToolResultChars is set, any FunctionResultContent string that exceeds
         // the limit is truncated and annotated with the omitted character count.
         if (window.MaxToolResultChars > 0)
-            list = TruncateToolResults(list, window.MaxToolResultChars);
+            list = TruncateToolResults(list, window.MaxToolResultChars, window.ToolResultCharOverrides);
 
         return list;
     }
@@ -174,7 +174,10 @@ public static IReadOnlyList<ChatMessage> Apply(
     // The rest is elided — the model's mental model of the file is stale at that point anyway.
     private const int ConsumedReadCapChars = 500;
 
-    private static List<ChatMessage> TruncateToolResults(List<ChatMessage> list, int maxChars)
+    private static List<ChatMessage> TruncateToolResults(
+        List<ChatMessage> list,
+        int maxChars,
+        IReadOnlyDictionary<string, int>? overrides = null)
     {
         // Fast path: no ChatRole.Tool messages in the slice.
         if (!list.Any(m => m.Role == ChatRole.Tool)) return list;
@@ -184,6 +187,16 @@ private static List<ChatMessage> TruncateToolResults(List<ChatMessage> list, int
         // the model hasn't yet acted on are left at the normal maxChars limit.
         var consumedReadIds = BuildConsumedReadCallIds(list);
 
+        // Build callId → toolName so per-tool overrides can be resolved for each result.
+        var callToolNames = new Dictionary<string, string>(StringComparer.Ordinal);
+        foreach (var msg in list)
+        {
+            if (msg.Role != ChatRole.Assistant) continue;
+            foreach (var c in msg.Contents)
+                if (c is FunctionCallContent fc && fc.CallId is not null)
+                    callToolNames[fc.CallId] = fc.Name ?? string.Empty;
+        }
+
         var result = new List<ChatMessage>(list.Count);
         foreach (var msg in list)
         {
@@ -211,10 +224,29 @@ private static List<ChatMessage> TruncateToolResults(List<ChatMessage> list, int
                             $"file was written or patched later this session; " +
                             $"call read_file again if current content is needed]";
                     }
-                    else if (s.Length > maxChars)
+                    else
                     {
-                        truncated = s[..maxChars] +
-                            $"\n[...truncated — {s.Length - maxChars:N0} chars omitted to reduce context size...]";
+                        // Resolve the per-tool limit: check overrides first, then fall back to maxChars.
+                        // A zero override value disables truncation for that tool entirely.
+                        int limit = maxChars;
+                        if (overrides is { Count: > 0 } &&
+                            callToolNames.TryGetValue(fr.CallId ?? string.Empty, out var toolName))
+                        {
+                            foreach (var kv in overrides)
+                            {
+                                if (string.Equals(kv.Key, toolName, StringComparison.OrdinalIgnoreCase))
+                                {
+                                    limit = kv.Value;
+                                    break;
+                                }
+                            }
+                        }
+
+                        if (limit > 0 && s.Length > limit)
+                        {
+                            truncated = s[..limit] +
+                                $"\n[...truncated — {s.Length - limit:N0} chars omitted to reduce context size...]";
+                        }
                     }
 
                     if (truncated is not null)
diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs
index 86713a5..555685f 100644
--- a/src/Orchestration/ConversationCompactor.cs
+++ b/src/Orchestration/ConversationCompactor.cs
@@ -167,7 +167,11 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                     toCompact[0].TurnIndex, toCompact[^1].TurnIndex, cancellationToken);
                 var intentSummary = BuildIntentDerivedSummary(
                     toCompact[0].TurnIndex, toCompact[^1].TurnIndex, intents, prefixBlock);
-                intentSummary = intentSummary with { Usage = AccumulateCompactedUsage(toCompact, null) };
+                intentSummary = intentSummary with
+                {
+                    Usage     = AccumulateCompactedUsage(toCompact, null),
+                    ToolCalls = AccumulateCompactedToolCalls(toCompact),
+                };
                 logger.LogInformation(
                     "Intent compaction: {Compacted} turns replaced by intent log reconstruction ({IntentCount} intents).",
                     toCompact.Count, intents.Count);
@@ -191,7 +195,11 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                 };
             if (ExpandedNote is not null)
                 reconstructed = reconstructed with { Content = reconstructed.Content + "\n\n---\n" + ExpandedNote };
-            reconstructed = reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) };
+            reconstructed = reconstructed with
+            {
+                Usage     = AccumulateCompactedUsage(toCompact, null),
+                ToolCalls = AccumulateCompactedToolCalls(toCompact),
+            };
             logger.LogInformation(
                 "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.",
                 toCompact.Count);
@@ -222,7 +230,8 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                     Role                = "user",
                     TurnIndex           = toCompact[^1].TurnIndex,
                     IsCompactionSummary = true,
-                    Usage               = AccumulateCompactedUsage(toCompact, summUsage)
+                    Usage               = AccumulateCompactedUsage(toCompact, summUsage),
+                    ToolCalls           = AccumulateCompactedToolCalls(toCompact),
                 };
 
                 logger.LogInformation(
@@ -261,7 +270,8 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                 Role                = "user",
                 TurnIndex           = toCompact[^1].TurnIndex,
                 IsCompactionSummary = true,
-                Usage               = AccumulateCompactedUsage(toCompact, summaryUsage)
+                Usage               = AccumulateCompactedUsage(toCompact, summaryUsage),
+                ToolCalls           = AccumulateCompactedToolCalls(toCompact),
             };
 
             logger.LogInformation(
@@ -276,12 +286,30 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
             logger.LogError(ex,
                 "LLM compaction failed; inserting fallback marker for turns {First}–{Last}.",
                 toCompact[0].TurnIndex, toCompact[^1].TurnIndex);
-            return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message), toRetain);
+            return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message)
+                with { ToolCalls = AccumulateCompactedToolCalls(toCompact) }, toRetain);
         }
     }
 
     // Internals
 
+    // Collects all ToolCallRecord entries from the compacted turns into a flat list so the
+    // summary message preserves them. Downstream consumers (telemetry, BuildModifiedFilesNote)
+    // inspect ToolCalls on AgentMessages; without this they silently drop records for any turn
+    // that was compacted, producing incomplete data for succeeded/failed tool tracking.
+    private static IReadOnlyList<ToolCallRecord>? AccumulateCompactedToolCalls(
+        IReadOnlyList<AgentMessage> compacted)
+    {
+        List<ToolCallRecord>? all = null;
+        foreach (var m in compacted)
+        {
+            if (m.ToolCalls is not { Count: > 0 }) continue;
+            all ??= [];
+            all.AddRange(m.ToolCalls);
+        }
+        return all;
+    }
+
     // Sums the token costs of all compacted turns and folds in the summary-call cost.
     // The total is stored on the summary AgentMessage so AgentOrchestrator can seed
     // cumulativeTokens correctly on the next StreamAsync call (after resume/compaction),

From 83fda6ac9e7a42538ab30fdbcabcc3b7eda16218 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 20:53:12 -0500
Subject: [PATCH 4/7] fix(orchestration): budget guard, anti-thrash, sub-agent
 stagnation

- AgentOrchestrator: add EstimateContextTokens helper and a pre-turn budget
  guard that aborts before agent.RunAsync when cumulativeTokens + estimated
  input tokens > MaxTotalTokens, preventing expensive one-turn overshoots.

- CompactionConfig: raise AntiThrashWindow default from 3 to 10 so a single
  productive compaction no longer resets the guard in long sessions.

- CorrectionEngine / GraphOrchestrator: add optional turnToolCalls parameter
  to InjectNoKeywordCorrection; the no-tool-calls gate also checks
  AgentMessage.ToolCalls so SubAgentPlugin responses are not misclassified
  as stagnation.
---
 src/Core/Models/CompactionConfig.cs           |  4 +--
 src/Orchestration/AgentOrchestrator.cs        | 35 +++++++++++++++++++
 src/Orchestration/GraphOrchestrator.cs        |  6 ++--
 .../Workflow/CorrectionEngine.cs              |  9 +++--
 4 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/src/Core/Models/CompactionConfig.cs b/src/Core/Models/CompactionConfig.cs
index 508a791..f0cdd20 100644
--- a/src/Core/Models/CompactionConfig.cs
+++ b/src/Core/Models/CompactionConfig.cs
@@ -111,7 +111,7 @@ public record CompactionConfig
     /// <summary>
     /// Number of recent compaction outcomes to examine for the anti-thrash guard.
     /// Only suppresses compaction once this many outcomes have been recorded.
-    /// Default: <c>3</c>. Set to <c>0</c> to disable the anti-thrash check.
+    /// Default: <c>10</c>. Set to <c>0</c> to disable the anti-thrash check.
     /// </summary>
-    public int AntiThrashWindow { get; init; } = 3;
+    public int AntiThrashWindow { get; init; } = 10;
 }
diff --git a/src/Orchestration/AgentOrchestrator.cs b/src/Orchestration/AgentOrchestrator.cs
index 9d639b1..613c3f4 100644
--- a/src/Orchestration/AgentOrchestrator.cs
+++ b/src/Orchestration/AgentOrchestrator.cs
@@ -475,6 +475,22 @@ await eventEmitter.EmitAsync("turn_end",
                 history.Count,
                 filtered.Count);
 
+            // Pre-turn budget guard: estimate the input token cost of this context slice and
+            // abort before the LLM call if cumulative + estimated input would exceed the limit.
+            // Prevents the one-turn overshoot that occurs when the post-yield check fires too
+            // late (e.g. a file-read turn that consumes tens of thousands of tokens).
+            if (config.MaxTotalTokens is { } preTurnLimit)
+            {
+                var estimatedInputTokens = EstimateContextTokens(context);
+                if (cumulativeTokens + estimatedInputTokens > preTurnLimit)
+                {
+                    logger.LogWarning(
+                        "[Orchestrator] Pre-turn budget guard: cumulative {Cumulative:N0} + estimated input {Estimated:N0} > limit {Limit:N0} — aborting before turn.",
+                        cumulativeTokens, estimatedInputTokens, preTurnLimit);
+                    throw new BudgetExceededException(cumulativeTokens + estimatedInputTokens, preTurnLimit);
+                }
+            }
+
             AgentResponse response = governanceKernel?.CircuitBreaker is { } cb
                 ? await cb.ExecuteAsync(() => agent.RunAsync(context, null, null, cancellationToken))
                 : await agent.RunAsync(context, null, null, cancellationToken);
@@ -769,4 +785,23 @@ private static void WireDidResolver(ITerminationCondition condition, Func<string
     }
 
     private static string GenerateSessionId() => Guid.NewGuid().ToString("N")[..8];
+
+    // Estimates the input token cost of a context slice by summing all content chars across
+    // message types and dividing by 4. Used for the pre-turn budget guard; intentionally
+    // conservative (actual tokenisation may differ but is rarely smaller than chars/4).
+    private static int EstimateContextTokens(IEnumerable<ChatMessage> messages)
+    {
+        int chars = 0;
+        foreach (var msg in messages)
+            foreach (var content in msg.Contents)
+                chars += content switch
+                {
+                    TextContent tc        => tc.Text?.Length ?? 0,
+                    FunctionCallContent fc => (fc.Name?.Length ?? 0) +
+                        (fc.Arguments?.Values.Sum(v => v?.ToString()?.Length ?? 0) ?? 0),
+                    FunctionResultContent fr => fr.Result?.ToString()?.Length ?? 0,
+                    _ => 0,
+                };
+        return chars / 4;
+    }
 }
diff --git a/src/Orchestration/GraphOrchestrator.cs b/src/Orchestration/GraphOrchestrator.cs
index 028a5c3..abd5a8c 100644
--- a/src/Orchestration/GraphOrchestrator.cs
+++ b/src/Orchestration/GraphOrchestrator.cs
@@ -1165,7 +1165,8 @@ await eventEmitter.EmitAsync("no_keyword",
 
             int histBefore2 = ctx.History.Count;
             await CorrectionEngine.InjectNoKeywordCorrection(
-                ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter);
+                ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter,
+                agentMsg.ToolCalls);
             await PersistCorrectionsAsync(ctx, histBefore2, ct).ConfigureAwait(false);
 
             if (consecutiveFails >= maxRetries)
@@ -1623,7 +1624,8 @@ await eventEmitter.EmitAsync("no_keyword",
 
             int histBefore2 = ctx.History.Count;
             await CorrectionEngine.InjectNoKeywordCorrection(
-                ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter);
+                ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter,
+                agentMsg.ToolCalls);
             await PersistCorrectionsAsync(ctx, histBefore2, ct).ConfigureAwait(false);
 
             if (consecutiveFails >= maxRetries)
diff --git a/src/Orchestration/Workflow/CorrectionEngine.cs b/src/Orchestration/Workflow/CorrectionEngine.cs
index 2291fe6..61ac8ff 100644
--- a/src/Orchestration/Workflow/CorrectionEngine.cs
+++ b/src/Orchestration/Workflow/CorrectionEngine.cs
@@ -1,5 +1,6 @@
 using Microsoft.Extensions.AI;
 using fuseraft.Core;
+using fuseraft.Core.Models;
 
 namespace fuseraft.Orchestration.Workflow;
 
@@ -43,7 +44,8 @@ internal static async Task InjectNoKeywordCorrection(
         string agentName,
         int consecutiveCount,
         AgentRouteTable routeTable,
-        EventEmitter? eventEmitter = null)
+        EventEmitter? eventEmitter = null,
+        IReadOnlyList<ToolCallRecord>? turnToolCalls = null)
     {
         var validKeywordList = BuildValidKeywordList(routeTable);
         bool isReviewerType  = routeTable.PhaseBreakKeywords.Contains("APPROVED");
@@ -51,7 +53,10 @@ internal static async Task InjectNoKeywordCorrection(
         if (TryInjectForeignKeywordCorrection(history, responseText, routeTable, agentName, validKeywordList)) return;
         if (TryInjectCodeBlockCorrection(history, responseText, isReviewerType, validKeywordList)) return;
 
-        if (!CurrentTurnHasToolCalls(history))
+        // Also treat as "has tool calls" when the AgentMessage records sub-agent tool calls
+        // that ran inside a SubAgentPlugin — those don't produce ChatRole.Tool entries in the
+        // outer history so CurrentTurnHasToolCalls would return false without this check.
+        if (!CurrentTurnHasToolCalls(history) && (turnToolCalls is null || turnToolCalls.Count == 0))
         {
             InjectNoToolCallsCorrection(history, isReviewerType, validKeywordList);
             return;

From 245d22d59ec830612c4f5400071558f4e938651a Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 21:16:41 -0500
Subject: [PATCH 5/7] fix(orchestration): intent fallback, sandbox writes,
 evidence verifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- intent mode silently downgraded when intentLog is null; agents resuming
  after compaction had no signal the summary was degraded — now prepends a
  visible COMPACTION WARNING block and emits a startup LogWarning when
  Compaction.Mode=intent but no IntentLogPath is configured
- patch_file / create_directory / delete_directory / set_permissions /
  copy_file / move_file bypassed sandbox boundary check when no
  FileSystemPermissions globs were configured; added SandboxedExtendedWriteFunctions
  so these always route through InspectFileSystem regardless of glob state
- verifier post-turn block only fired on EveryNTurns; TriggerOnSuspiciousTransition
  had no effect outside StateMachineSelectionStrategy — wired
  HasSuspiciousTransitionSignal to detect ConflictingEvidence/NoProgress
  corrections injected by SelectAsync and trigger the verifier immediately;
  pins EVIDENCE INCONSISTENCY / EVIDENCE AUDIT REQUIRED / MISSING ARTIFACT
  in CorrectionPrefixes so they survive MaxTailMessages trim
- expose ReadSessionContextAsync on ContextAssembler and auto-inject session
  context summary for agents without an explicit Context spec
- default IncludeReasoning and IncludeSymbolGraph to true; add MaxReplayChars
  to ContextWindowConfig with per-agent TruncateAssistantContent step in Apply
---
 src/Cli/OrchestratorBuilder.cs                |  9 +++
 src/Core/Models/CompactionConfig.cs           | 12 ++--
 src/Core/Models/ContextWindowConfig.cs        | 15 +++++
 .../Plugins/SandboxEnforcementFilter.cs       | 15 ++++-
 src/Orchestration/AgentOrchestrator.cs        | 59 ++++++++++++++++---
 src/Orchestration/ContextWindowFilter.cs      | 56 +++++++++++++++---
 src/Orchestration/ConversationCompactor.cs    | 22 +++++--
 src/Orchestration/HandoffContextResolver.cs   |  8 +++
 8 files changed, 169 insertions(+), 27 deletions(-)

diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs
index ee3c345..e164e9f 100644
--- a/src/Cli/OrchestratorBuilder.cs
+++ b/src/Cli/OrchestratorBuilder.cs
@@ -713,6 +713,15 @@ t.Pattern    is not null ||
                 chatClientFactory.Create(summaryModel), compactionConfig,
                 loggerFactory.CreateLogger<ConversationCompactor>(),
                 resumptionNote, changeLogPath, intentLog, config.Events?.Path, evidenceStore);
+
+            if ((compactionConfig.Mode ?? string.Empty).Equals("intent", StringComparison.OrdinalIgnoreCase)
+                && intentLog is null)
+            {
+                loggerFactory.CreateLogger(nameof(OrchestratorBuilder)).LogWarning(
+                    "Compaction.Mode is 'intent' but no ChangeTracking.IntentLogPath is configured — " +
+                    "compaction will fall back to lossless or LLM mode at runtime. " +
+                    "Set ChangeTracking.IntentLogPath to enable deterministic intent compaction.");
+            }
         }
 
         // Build the post-session skill curator when curation is enabled.
diff --git a/src/Core/Models/CompactionConfig.cs b/src/Core/Models/CompactionConfig.cs
index f0cdd20..d6e4a96 100644
--- a/src/Core/Models/CompactionConfig.cs
+++ b/src/Core/Models/CompactionConfig.cs
@@ -67,19 +67,21 @@ public record CompactionConfig
     /// When <c>true</c>, reasoning excerpts from the compacted turn range are prepended to
     /// the compaction summary. Each excerpt is truncated to approximately 500 tokens so agents
     /// resuming after compaction can see the WHY behind prior decisions, not just the artifacts.
-    /// Reads <c>reasoning</c> events from the session's events log. Default: <c>false</c>.
+    /// Reads <c>reasoning</c> events from the session's events log. When the events log is
+    /// absent or contains no reasoning events the block is omitted silently.
+    /// Default: <c>true</c>.
     /// </summary>
-    public bool IncludeReasoning { get; init; } = false;
+    public bool IncludeReasoning { get; init; } = true;
 
     /// <summary>
     /// When <c>true</c>, a symbol dependency graph derived from the session's changed files is
     /// prepended to the compaction summary (before reasoning excerpts when both are enabled).
     /// Queries <c>SymbolDefinition</c> and <c>SymbolReference</c> nodes from the evidence store
     /// for every file written during the session, giving agents an explicit map of what symbols
-    /// were in scope across the compacted turns. Requires an active <c>EvidenceStore</c>.
-    /// Default: <c>false</c>.
+    /// were in scope across the compacted turns. When no evidence store is wired or no symbol
+    /// nodes are found the block is omitted silently. Default: <c>true</c>.
     /// </summary>
-    public bool IncludeSymbolGraph { get; init; } = false;
+    public bool IncludeSymbolGraph { get; init; } = true;
 
     /// <summary>
     /// Optional custom prompt template for LLM-mode compaction. When set, replaces the
diff --git a/src/Core/Models/ContextWindowConfig.cs b/src/Core/Models/ContextWindowConfig.cs
index f262f44..dcb5874 100644
--- a/src/Core/Models/ContextWindowConfig.cs
+++ b/src/Core/Models/ContextWindowConfig.cs
@@ -106,6 +106,21 @@ public sealed record ContextWindowConfig
     /// </summary>
     public int MaxToolResultChars { get; init; }
 
+    /// <summary>
+    /// Maximum characters to replay from a single non-summary assistant message in the
+    /// history slice passed to this agent. When an assistant message text exceeds this limit
+    /// the content is truncated and annotated with the omitted character count.
+    ///
+    /// <para>
+    /// Agents sometimes produce multi-thousand-character reasoning blocks that are replayed
+    /// verbatim on every subsequent turn, compounding input-token growth. Compaction-summary
+    /// messages are never truncated regardless of this setting.
+    /// </para>
+    ///
+    /// Default: <c>0</c> (uses the global 2,000-char fallback applied during session replay).
+    /// </summary>
+    public int MaxReplayChars { get; init; }
+
     /// <summary>
     /// Per-tool-name character limit overrides applied during tool result truncation.
     /// When a key matches a tool function name (case-insensitive), its value is used as the
diff --git a/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs b/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs
index ef279a2..8da4ab1 100644
--- a/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs
+++ b/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs
@@ -74,6 +74,15 @@ public sealed class SandboxEnforcementFilter
     private static readonly string[] FileSystemFunctions =
         ["read_file", "write_file", "delete_file", "list_files"];
 
+    // Write-type extended functions that must always be routed through InspectFileSystem for
+    // sandbox boundary checks, even when no FileSystemPermissions glob matchers are configured.
+    // These functions create, modify, or remove paths and must stay within the sandbox root.
+    private static readonly HashSet<string> SandboxedExtendedWriteFunctions = new(StringComparer.OrdinalIgnoreCase)
+    {
+        "patch_file", "create_directory", "delete_directory", "set_permissions",
+        "copy_file", "move_file",
+    };
+
     private static readonly string[] ShellFunctions =
         ["shell_run", "shell_run_script"];
 
@@ -205,11 +214,13 @@ public AIAgent WrapAgent(AIAgent agent) =>
         var ringDenial = InspectRing(functionName);
         if (ringDenial is not null) return ringDenial;
 
-        // Core FS functions are always sandboxed; extended functions are routed when any glob
-        // matcher is configured so they get sandbox + deny/read/write checks.
+        // Core FS functions are always sandboxed; write-type extended functions are also
+        // always sandboxed (boundary check only). Other extended functions are routed when
+        // any glob matcher is configured so they get sandbox + deny/read/write checks.
         bool hasGlobMatcher = _fsDenyMatcher is not null || _fsReadMatcher is not null || _fsWriteMatcher is not null;
         bool isFsFunction = FileSystemFunctions.Any(f =>
                 string.Equals(f, functionName, StringComparison.OrdinalIgnoreCase))
+            || SandboxedExtendedWriteFunctions.Contains(functionName)
             || (hasGlobMatcher && AllExtendedFsFunctions.Contains(functionName));
 
         if (isFsFunction)
diff --git a/src/Orchestration/AgentOrchestrator.cs b/src/Orchestration/AgentOrchestrator.cs
index 613c3f4..46216c7 100644
--- a/src/Orchestration/AgentOrchestrator.cs
+++ b/src/Orchestration/AgentOrchestrator.cs
@@ -419,7 +419,11 @@ await eventEmitter.EmitAsync("turn_end",
             }
 
             // Select the next agent.
+            // Capture the history count before selection so correction messages injected by
+            // the strategy (ConflictingEvidence / NoProgress) can be identified afterwards.
+            int preSelectCount = history.Count;
             var agent = await selection.SelectAsync(agents, history, cancellationToken);
+            int postSelectCount = history.Count;
             if (agent is null) break;
 
             logger.LogDebug(
@@ -445,7 +449,9 @@ await eventEmitter.EmitAsync("turn_end",
             // sees only what it needs rather than the full session transcript. The shared
             // history list is still updated after the turn so routing/termination strategies
             // continue to work normally.
-            // When no Context spec is set, fall back to the traditional ContextWindow filter.
+            // When no Context spec is set, fall back to the traditional ContextWindow filter
+            // and auto-inject the session context summary (context_summary.md) as the second
+            // message when it exists, preventing agents from wasting turns re-reading brief.json.
             var agentCfg = agentConfigs.GetValueOrDefault(agent.Name ?? "");
             IReadOnlyList<ChatMessage> filtered;
             if (agentCfg?.Context is { Count: > 0 } agentContextSources && contextAssembler is not null)
@@ -459,7 +465,22 @@ await eventEmitter.EmitAsync("turn_end",
             }
             else
             {
-                filtered = ContextWindowFilter.Apply(history, agentCfg?.ContextWindow);
+                var raw = ContextWindowFilter.Apply(history, agentCfg?.ContextWindow);
+                if (contextAssembler is not null)
+                {
+                    var sessionCtx = await contextAssembler.ReadSessionContextAsync(cancellationToken);
+                    if (sessionCtx is not null)
+                    {
+                        var withCtx = new List<ChatMessage>(raw.Count + 1);
+                        if (raw.Count > 0) withCtx.Add(raw[0]);
+                        withCtx.Add(new ChatMessage(ChatRole.User,
+                            $"[Session Context]\n\n{sessionCtx.Trim()}"));
+                        withCtx.AddRange(raw.Skip(1));
+                        filtered = withCtx;
+                    }
+                    else filtered = raw;
+                }
+                else filtered = raw;
             }
 
             IEnumerable<ChatMessage> context = (hasInstructions || memoryManager is not null) && instructions is not null
@@ -597,13 +618,16 @@ await eventEmitter.EmitAsync("reasoning",
             if (memoryManager is not null)
                 await memoryManager.PostTurnAsync(agentMessage.AgentName, [..history], cancellationToken);
 
-            // Periodic verifier: run the meta-agent every N turns to audit evidence.
-            // Skipped when the verifier itself just ran to prevent self-loops.
-            if (config.Verifier is { EveryNTurns: > 0 } verCfg
+            // Periodic verifier: run the meta-agent every N turns to audit evidence, OR
+            // immediately when a ConflictingEvidence / NoProgress correction was injected this
+            // turn (evidence-driven trigger). Skipped when the verifier itself just ran.
+            if (config.Verifier is { } verCfg
                 && verifierAgent is not null
-                && agentMessage.TurnIndex > 0
-                && agentMessage.TurnIndex % verCfg.EveryNTurns == 0
-                && !string.Equals(agentMessage.AgentName, verCfg.AgentName, StringComparison.OrdinalIgnoreCase))
+                && !string.Equals(agentMessage.AgentName, verCfg.AgentName, StringComparison.OrdinalIgnoreCase)
+                && (
+                    (verCfg.EveryNTurns > 0 && agentMessage.TurnIndex > 0 && agentMessage.TurnIndex % verCfg.EveryNTurns == 0)
+                    || (verCfg.TriggerOnSuspiciousTransition && HasSuspiciousTransitionSignal(history, preSelectCount, postSelectCount))
+                ))
             {
                 AgentStarting?.Invoke(verifierAgent.Name ?? "Verifier");
                 agentFactory.OnAgentTurnStarting();
@@ -786,6 +810,25 @@ private static void WireDidResolver(ITerminationCondition condition, Func<string
 
     private static string GenerateSessionId() => Guid.NewGuid().ToString("N")[..8];
 
+    // Scans messages at indices [from, to) for ConflictingEvidence or NoProgress correction
+    // signals injected by the selection strategy. Returns true when any such signal is found,
+    // indicating the verifier should audit the current turn's output.
+    private static bool HasSuspiciousTransitionSignal(IList<ChatMessage> history, int from, int to)
+    {
+        for (int i = from; i < to && i < history.Count; i++)
+        {
+            var msg = history[i];
+            if (msg.Role != ChatRole.User) continue;
+            var text = msg.Text ?? string.Empty;
+            if (text.StartsWith("NO TOOL CALLS",         StringComparison.Ordinal) ||
+                text.StartsWith("CRITICAL:",              StringComparison.Ordinal) ||
+                text.Contains("EVIDENCE INCONSISTENCY",  StringComparison.Ordinal) ||
+                text.Contains("EVIDENCE AUDIT REQUIRED", StringComparison.Ordinal))
+                return true;
+        }
+        return false;
+    }
+
     // Estimates the input token cost of a context slice by summing all content chars across
     // message types and dividing by 4. Used for the pre-turn budget guard; intentionally
     // conservative (actual tokenisation may differ but is rarely smaller than chars/4).
diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs
index 266db0e..cdc8cfd 100644
--- a/src/Orchestration/ContextWindowFilter.cs
+++ b/src/Orchestration/ContextWindowFilter.cs
@@ -166,9 +166,48 @@ public static IReadOnlyList<ChatMessage> Apply(
         if (window.MaxToolResultChars > 0)
             list = TruncateToolResults(list, window.MaxToolResultChars, window.ToolResultCharOverrides);
 
+        // Step 7: Truncate verbose assistant messages.
+        // When MaxReplayChars is set, assistant text content that exceeds the limit is
+        // truncated. Compaction-summary messages (marked by their header prefix) are exempt.
+        if (window.MaxReplayChars > 0)
+            list = TruncateAssistantContent(list, window.MaxReplayChars);
+
         return list;
     }
 
+    private static List<ChatMessage> TruncateAssistantContent(List<ChatMessage> list, int maxChars)
+    {
+        var result = new List<ChatMessage>(list.Count);
+        foreach (var msg in list)
+        {
+            if (msg.Role != ChatRole.Assistant)
+            {
+                result.Add(msg);
+                continue;
+            }
+
+            var textContent = string.Concat(msg.Contents.OfType<TextContent>().Select(t => t.Text));
+            // Compaction summaries are already compact — skip them unconditionally.
+            if (textContent.StartsWith("[CONVERSATION SUMMARY", StringComparison.Ordinal) ||
+                textContent.Length <= maxChars)
+            {
+                result.Add(msg);
+                continue;
+            }
+
+            var truncated = textContent[..maxChars] +
+                $"\n[...truncated — {textContent.Length - maxChars:N0} chars omitted to reduce context size...]";
+
+            var newContents = msg.Contents
+                .Where(c => c is not TextContent)
+                .Prepend(new TextContent(truncated))
+                .ToList<AIContent>();
+
+            result.Add(new ChatMessage(ChatRole.Assistant, newContents) { AuthorName = msg.AuthorName });
+        }
+        return result;
+    }
+
     // How much of a consumed read_file result to keep for structural context (file shape,
     // imports, class header) after a downstream write/patch confirms the content was acted on.
     // The rest is elided — the model's mental model of the file is stale at that point anyway.
@@ -421,6 +460,9 @@ private static List<ChatMessage> SanitizeToolPairs(List<ChatMessage> list)
         "VERIFICATION FINDING",
         "Files written this turn",
         "No handoff keyword",
+        "EVIDENCE INCONSISTENCY",   // ConflictingEvidence (KeywordSelectionStrategy)
+        "EVIDENCE AUDIT REQUIRED",  // ConflictingEvidence (StateMachineSelectionStrategy)
+        "MISSING ARTIFACT",         // MissingEvidence (both strategies)
     ];
 
     /// <summary>
@@ -440,28 +482,28 @@ public static bool IsCorrectionMessage(ChatMessage message)
         return false;
     }
 
-    // Maximum number of characters to replay from a single non-summary assistant message.
+    // Global default applied during checkpoint-resume replay when no per-agent limit is set.
     // Agents sometimes produce verbose stream-of-consciousness reasoning text (3–5k output
     // tokens). When that text is replayed verbatim in every subsequent turn it causes
     // compaction summaries to grow each cycle and in-turn input tokens to balloon (450k+).
     // Compaction summaries (IsCompactionSummary) are already compact and are never truncated.
-    private const int MaxReplayChars = 2_000;
+    internal const int DefaultMaxReplayChars = 2_000;
 
     /// <summary>
     /// Returns the content string to replay for <paramref name="message"/> into the next
     /// <c>StreamAsync</c> call's history. Verbose non-summary assistant messages are
-    /// truncated at <see cref="MaxReplayChars"/> to prevent compounding context growth.
+    /// truncated at <paramref name="maxReplayChars"/> to prevent compounding context growth.
     /// </summary>
-    public static string TruncateReplayContent(AgentMessage message)
+    public static string TruncateReplayContent(AgentMessage message, int maxReplayChars = DefaultMaxReplayChars)
     {
         var content = message.Content ?? string.Empty;
 
         if (message.IsCompactionSummary
             || message.Role != "assistant"
-            || content.Length <= MaxReplayChars)
+            || content.Length <= maxReplayChars)
             return content;
 
-        return content[..MaxReplayChars] +
-               $"\n[...truncated — {content.Length - MaxReplayChars:N0} chars omitted to reduce context size...]";
+        return content[..maxReplayChars] +
+               $"\n[...truncated — {content.Length - maxReplayChars:N0} chars omitted to reduce context size...]";
     }
 }
diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs
index 555685f..c2ce79a 100644
--- a/src/Orchestration/ConversationCompactor.cs
+++ b/src/Orchestration/ConversationCompactor.cs
@@ -159,6 +159,9 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
         var prefixBlock     = CombineBlocks(symbolBlock, reasoningBlock);
 
         // Intent mode: reconstruct from the intent log — fully deterministic, no LLM call.
+        // When the intent log is unavailable, record a visible fallback notice so agents
+        // resuming after compaction know the summary was degraded.
+        string? intentFallbackNotice = null;
         if (mode == "intent")
         {
             if (intentLog is not null)
@@ -179,7 +182,12 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
             }
 
             logger.LogWarning(
-                "Compaction mode is 'intent' but no intent log is available — falling back to lossless/llm.");
+                "Compaction mode is 'intent' but no intent log is available — falling back to lossless/llm. " +
+                "Configure ChangeTracking.IntentLogPath to enable deterministic intent compaction.");
+            intentFallbackNotice =
+                "[COMPACTION WARNING: 'intent' mode was requested but no intent log is wired — " +
+                "this summary was generated using fallback compaction (lossless or LLM). " +
+                "Configure ChangeTracking.IntentLogPath to suppress this warning.]";
             // Fall through to lossless / llm.
         }
 
@@ -203,7 +211,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
             logger.LogInformation(
                 "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.",
                 toCompact.Count);
-            return (reconstructed, toRetain);
+            return (PrependFallbackNotice(reconstructed, intentFallbackNotice), toRetain);
         }
 
         // Hybrid: prepend reconstruction before the LLM summary.
@@ -278,7 +286,7 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
                 "Compaction complete. Turns 0–{Last} replaced by summary.",
                 toCompact[^1].TurnIndex);
 
-            return (summary, toRetain);
+            return (PrependFallbackNotice(summary, intentFallbackNotice), toRetain);
         }
         catch (OperationCanceledException) { throw; }
         catch (Exception ex)
@@ -286,8 +294,9 @@ public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> mess
             logger.LogError(ex,
                 "LLM compaction failed; inserting fallback marker for turns {First}–{Last}.",
                 toCompact[0].TurnIndex, toCompact[^1].TurnIndex);
-            return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message)
-                with { ToolCalls = AccumulateCompactedToolCalls(toCompact) }, toRetain);
+            var fallback = BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message)
+                with { ToolCalls = AccumulateCompactedToolCalls(toCompact) };
+            return (PrependFallbackNotice(fallback, intentFallbackNotice), toRetain);
         }
     }
 
@@ -528,6 +537,9 @@ private void RecordSavings(double ratio)
             _recentSavings.Dequeue();
     }
 
+    private static AgentMessage PrependFallbackNotice(AgentMessage msg, string? notice) =>
+        notice is null ? msg : msg with { Content = notice + "\n\n" + msg.Content };
+
     private AgentMessage BuildFallbackSummary(int firstTurn, int lastTurn, string errorMessage)
     {
         var content =
diff --git a/src/Orchestration/HandoffContextResolver.cs b/src/Orchestration/HandoffContextResolver.cs
index db395c0..b730bf9 100644
--- a/src/Orchestration/HandoffContextResolver.cs
+++ b/src/Orchestration/HandoffContextResolver.cs
@@ -54,6 +54,14 @@ public ContextAssembler(
 
     public void SetSessionId(string sessionId) => _sessionId = sessionId;
 
+    /// <summary>
+    /// Returns the current session context summary, or <c>null</c> when the file does not
+    /// exist or is empty. Used by orchestrators to auto-inject context for agents that do not
+    /// declare an explicit <c>Context</c> spec.
+    /// </summary>
+    public Task<string?> ReadSessionContextAsync(CancellationToken ct = default)
+        => ResolveSessionContextAsync(ct);
+
     // ── Handoff injection (state machine transitions) ────────────────────────
 
     /// <summary>

From 4f47483b0a1e978f00c61f1fa1c02b10d7446394 Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 21:21:35 -0500
Subject: [PATCH 6/7] docs: sync ContextWindow fields, compaction defaults,
 sandbox coverage

- IncludeReasoning and IncludeSymbolGraph now default to true; docs reflected
  the old false defaults, which would mislead users who expect opt-in behavior
- AntiThrashWindow default raised from 3 to 10 to prevent a single productive
  compaction from prematurely resetting the guard on long sessions
- MaxReplayChars and ToolResultCharOverrides are new ContextWindow fields;
  omitting them from the reference left the per-agent replay-truncation and
  per-tool cap-override knobs undiscoverable
- Security sandbox table was missing the extended write functions
  (patch_file, create_directory, etc.) whose sandbox enforcement was just fixed
  to fire even when no FileSystemPermissions globs are configured
---
 docs/configuration.md      |  9 ++++---
 docs/context-management.md | 54 +++++++++++++++++++++++++-------------
 docs/security.md           | 13 +++++----
 3 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 9b36f0c..cd09baf 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -309,6 +309,9 @@ Filters are applied in order: `TextOnly` / `ExcludeAgents` first, then `MaxTurnA
 | `MaxTurnAge` | int | `0` | Keep only messages from the last N agent turns (each turn ends at an assistant reply). Applied after `TextOnly`/`ExcludeAgents` and before `MaxTailMessages`. Semantic alternative to a raw message count — discards entire early-session phases rather than an arbitrary number of messages. `0` means no limit. |
 | `MaxTailMessages` | int | `0` | After the above filters, keep only the last N messages. `0` means no limit. |
 | `ContextCapFraction` | double | `0.0` | Soft-cap threshold expressed as a fraction of `MaxTailMessages` (e.g. `0.8` = 80%). When the filtered count exceeds this threshold a `context_cap_warning` event is emitted. Does not change trim behavior — use `MaxTailMessages` to hard-cap. `0.0` disables the warning. |
+| `MaxToolResultChars` | int | `0` | Truncate `FunctionResultContent` strings in the replayed history slice to this many characters. A suffix noting the omitted count is appended. `0` disables truncation. See [context-management — Tool-result truncation](context-management.md#tool-result-truncation-maxtoolresultchars). |
+| `ToolResultCharOverrides` | object | `{}` | Per-tool-name character cap overrides. Keys are tool function names (case-insensitive); values are the character limit for that tool's results, overriding `MaxToolResultChars`. A value of `0` disables truncation for that tool. Only meaningful when `MaxToolResultChars` is also set. |
+| `MaxReplayChars` | int | `0` | Truncate non-summary assistant messages in the replayed history to this many characters. `0` uses the global 2,000-character fallback. Compaction summaries are never truncated. |
 
 **`TextOnly: true`** is the primary lever for context reduction. A Reviewer that independently re-reads files and re-runs commands gains nothing from hundreds of tool results produced by the Developer — stripping them can reduce input tokens by 90%+ in typical sessions.
 
@@ -721,11 +724,11 @@ Compaction:
 | `Model` | object | first agent's model | Model used for generating the summary (`llm` and `hybrid` modes only). |
 | `Mode` | string | `"llm"` | Compaction mode. See below. |
 | `TokenBudget` | int | `80000` | Estimated token budget for `window` mode. Oldest message pairs are dropped until the total estimated token count (characters ÷ 4) falls within this limit. Ignored by all other modes. |
-| `IncludeReasoning` | bool | `false` | When `true`, reasoning excerpts from the compacted turns are prepended to the summary as a `[REASONING EXCERPTS]` block. Each excerpt is truncated to ~500 tokens so agents resuming after compaction can see the WHY behind prior decisions. Reads `reasoning` events from the session events log (`Events.Path`). Has no effect when `Events` is not configured. |
-| `IncludeSymbolGraph` | bool | `false` | When `true`, a `[SYMBOL DEPENDENCY GRAPH]` block is prepended to the summary (before `[REASONING EXCERPTS]` when both are enabled). The block lists every `SymbolDefinition` and `SymbolReference` node in the evidence graph for files written during the session, giving agents an explicit map of what symbols were in scope. Requires `EvidenceStore` and `ChangeTracking` to be configured. |
+| `IncludeReasoning` | bool | `true` | Prepends a `[REASONING EXCERPTS]` block to the compaction summary. Each excerpt is truncated to ~500 tokens so agents resuming after compaction can see the WHY behind prior decisions. Reads `reasoning` events from the session events log (`Events.Path`). Omitted silently when `Events` is not configured or contains no reasoning events. Set to `false` to suppress. |
+| `IncludeSymbolGraph` | bool | `true` | Prepends a `[SYMBOL DEPENDENCY GRAPH]` block to the summary (before `[REASONING EXCERPTS]` when both are enabled). Lists every `SymbolDefinition` and `SymbolReference` node in the evidence graph for files written during the session. Omitted silently when no evidence store is wired or no symbol nodes are found. Requires `EvidenceStore` and `ChangeTracking` to be configured. Set to `false` to suppress. |
 | `MaxCharsPerHistoryMessage` | int | `8000` | Maximum characters to include from any single message when building the history text passed to the LLM summarizer. Messages that exceed this limit are truncated and annotated with a `[TRUNCATED]` marker; any tool calls recorded for that turn are appended as a compact one-line list so the summarizer still knows what happened. Set to `0` to disable truncation. |
 | `AntiThrashMinSavingsRatio` | float | `0.10` | Minimum savings ratio (0–1) a compaction must achieve to count as effective. If the last `AntiThrashWindow` compactions all saved less than this fraction of the conversation, `ShouldCompact` returns `false` until the history grows past the trigger again. Prevents repeated LLM calls that reduce size by less than 10%. Set to `0` to disable. |
-| `AntiThrashWindow` | int | `3` | Number of recent compaction outcomes to examine for the anti-thrash guard. The guard only suppresses compaction once this many outcomes have been recorded. Set to `0` to disable. |
+| `AntiThrashWindow` | int | `10` | Number of recent compaction outcomes to examine for the anti-thrash guard. The guard only suppresses compaction once this many outcomes have been recorded. Set to `0` to disable. |
 | `SummaryTemplate` | string | built-in | Custom Liquid-style template for the LLM summary prompt. Supports `{{$task}}`, `{{$turn_count}}`, `{{$change_log}}`, and `{{$history}}` substitutions. When omitted, the built-in structured template is used — see [Compaction summary template](#compaction-summary-template). |
 
 **Compaction modes**
diff --git a/docs/context-management.md b/docs/context-management.md
index fa3d380..9207274 100644
--- a/docs/context-management.md
+++ b/docs/context-management.md
@@ -165,6 +165,10 @@ Agents:
       MaxTailMessages: 40     # hard cap after the above filters
       ContextCapFraction: 0.8 # emit context_cap_warning when at 80% of MaxTailMessages
       MaxToolResultChars: 8000  # truncate individual tool results in replayed history
+      ToolResultCharOverrides:  # raise the cap for specific tools
+        search_content: 20000
+        grep_file: 20000
+      MaxReplayChars: 4000    # truncate verbose assistant messages in replayed history
 ```
 
 ### TextOnly
@@ -201,13 +205,22 @@ Hard cap applied after the other filters. When the filtered list still exceeds t
 the oldest messages are dropped. Set `ContextCapFraction` to receive a `context_cap_warning`
 event as an early signal before the hard cap is reached.
 
-### Replay truncation
+### Replay truncation (`MaxReplayChars`)
 
 Agents sometimes produce verbose stream-of-consciousness output (3–5k tokens). When that text
 is replayed verbatim in every subsequent turn, compaction summaries grow each cycle and input
-tokens balloon. fuseraft automatically truncates verbose non-summary assistant messages to
-2,000 characters when replaying them into the next turn's history. Compaction summaries are
-never truncated.
+tokens balloon. fuseraft truncates verbose non-summary assistant messages to 2,000 characters
+by default when replaying them; set `MaxReplayChars` to override this cap per agent.
+Compaction summaries are never truncated regardless of this setting.
+
+```yaml
+Agents:
+  - Name: Developer
+    ContextWindow:
+      MaxReplayChars: 4000   # truncate replayed assistant messages to 4 000 chars
+```
+
+Default: `0` (uses the global 2,000-character fallback).
 
 ### Tool-result truncation (`MaxToolResultChars`)
 
@@ -225,9 +238,12 @@ Agents:
   - Name: Developer
     ContextWindow:
       MaxToolResultChars: 8000   # truncate tool results in replayed history to 8 000 chars
+      ToolResultCharOverrides:   # per-tool overrides (search tools can afford a higher cap)
+        search_content: 20000
+        grep_file: 20000
 ```
 
-Default: `0` (no truncation).
+Default: `0` (no truncation). `ToolResultCharOverrides` is only meaningful when `MaxToolResultChars` is also set; a value of `0` in the overrides map disables truncation for that specific tool entirely.
 
 **Consumed-read optimisation:** fuseraft distinguishes between `read_file` results that
 the agent has already acted on and those that are still load-bearing:
@@ -396,23 +412,25 @@ Compaction:
 Two optional flags add structured context blocks before the LLM summary text. Both are
 prefixed in this order when both are enabled: symbol graph first, then reasoning excerpts.
 
-**`IncludeReasoning`** — prepends a `[REASONING EXCERPTS]` block containing the model's
-thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when the *why*
-behind prior decisions matters as much as the *what*. Requires `Events` to be configured
-(reasoning excerpts are read from the session events log).
+**`IncludeReasoning`** (default `true`) — prepends a `[REASONING EXCERPTS]` block containing
+the model's thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when
+the *why* behind prior decisions matters as much as the *what*. Requires `Events` to be
+configured (reasoning excerpts are read from the session events log). When the events log is
+absent or contains no reasoning events the block is omitted silently.
 
-**`IncludeSymbolGraph`** — prepends a `[SYMBOL DEPENDENCY GRAPH]` block listing every
-`SymbolDefinition` and `SymbolReference` node in the evidence store for files written during
-the session. Gives agents an explicit map of what symbols were in scope during the compacted
-turns. Requires `EvidenceStore` and `ChangeTracking` to be configured.
+**`IncludeSymbolGraph`** (default `true`) — prepends a `[SYMBOL DEPENDENCY GRAPH]` block
+listing every `SymbolDefinition` and `SymbolReference` node in the evidence store for files
+written during the session. Gives agents an explicit map of what symbols were in scope during
+the compacted turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. When no
+evidence store is wired the block is omitted silently.
 
 ```yaml
 Compaction:
   TriggerTurnCount: 40
   KeepRecentTurns: 8
   Mode: hybrid
-  IncludeReasoning: true
-  IncludeSymbolGraph: true
+  IncludeReasoning: true    # default; set to false to suppress
+  IncludeSymbolGraph: true  # default; set to false to suppress
 ```
 
 ### History pre-pruning
@@ -441,7 +459,7 @@ If repeated compactions save very little — for example, a conversation that is
 threshold but whose LLM summary is nearly as long as the history it replaced — fuseraft
 suppresses further compaction until the history grows meaningfully.
 
-The guard tracks the savings ratio of the last `AntiThrashWindow` compactions (default 3). If
+The guard tracks the savings ratio of the last `AntiThrashWindow` compactions (default 10). If
 every entry in that window is below `AntiThrashMinSavingsRatio` (default 10%), `ShouldCompact`
 returns `false`. The guard resets automatically as new turns extend the conversation past the
 trigger again.
@@ -450,8 +468,8 @@ trigger again.
 Compaction:
   TriggerTurnCount: 20
   KeepRecentTurns: 5
-  AntiThrashMinSavingsRatio: 0.15   # suppress if saving less than 15%
-  AntiThrashWindow: 4               # look at last 4 compactions
+  AntiThrashMinSavingsRatio: 0.15   # suppress if saving less than 15% (default: 0.10)
+  AntiThrashWindow: 4               # look at last 4 compactions (default: 10)
 ```
 
 Set either field to `0` to disable the guard entirely.
diff --git a/docs/security.md b/docs/security.md
index 6f4c8fd..ad90013 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -15,13 +15,12 @@ Security:
 
 ### What is checked
 
-| Plugin | Argument | Check type |
-|--------|----------|-----------|
-| `FileSystem` | `path` | Hard deny if resolved path is outside sandbox |
-| `FileSystem` | `directory` | Hard deny if resolved path is outside sandbox |
-| `Shell` | `workingDirectory` | Hard deny if resolved path is outside sandbox |
-| `Shell` | `command` | Best-effort scan for absolute paths escaping sandbox |
-| `Shell` | `script` | Best-effort scan for absolute paths escaping sandbox |
+| Plugin | Functions / Argument | Check type |
+|--------|----------------------|-----------|
+| `FileSystem` | `read_file`, `write_file`, `delete_file`, `list_files` — `path` / `directory` | Hard deny if resolved path is outside sandbox |
+| `FileSystem` | `patch_file`, `create_directory`, `delete_directory`, `set_permissions`, `copy_file`, `move_file` | Hard deny if resolved path is outside sandbox (always enforced, regardless of whether `FileSystemPermissions` globs are configured) |
+| `Shell` | `shell_run`, `shell_run_script` — `workingDirectory` | Hard deny if resolved path is outside sandbox |
+| `Shell` | `shell_run`, `shell_run_script` — `command` / `script` | Best-effort scan for absolute paths escaping sandbox |
 
 ### Path resolution
 

From 16af741ea9de65deae907f8e10cf1308b6e25dfb Mon Sep 17 00:00:00 2001
From: Scott Stauffer <scott@fuseraft.com>
Date: Mon, 1 Jun 2026 21:39:18 -0500
Subject: [PATCH 7/7] fix(compaction): align window-mode trigger with trim
 estimation

- ShouldCompact was summing Usage.TotalTokens (cumulative API call cost
  per turn, growing quadratically) while TrimToWindow used chars/4;
  the trigger could fire repeatedly while the trim found nothing to drop,
  producing a stuck compaction loop in window mode
- Both now use chars/4, matching the TokenBudget calibration documented
  in CompactionConfig and sessions.md
---
 src/Orchestration/ConversationCompactor.cs | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs
index c2ce79a..b9acd1a 100644
--- a/src/Orchestration/ConversationCompactor.cs
+++ b/src/Orchestration/ConversationCompactor.cs
@@ -51,19 +51,21 @@ resumptionNote is null ? null
     /// <summary>
     /// Returns true when <paramref name="messages"/> has reached or exceeded
     /// the configured trigger. In <c>window</c> mode the trigger is the estimated
-    /// token count vs <see cref="CompactionConfig.TokenBudget"/>; in all other
-    /// modes it is the assistant-turn count vs <see cref="CompactionConfig.TriggerTurnCount"/>.
+    /// token count (characters ÷ 4) vs <see cref="CompactionConfig.TokenBudget"/>, using
+    /// the same estimate as <see cref="TrimToWindow"/> so the two stay in sync; in all
+    /// other modes it is the assistant-turn count vs <see cref="CompactionConfig.TriggerTurnCount"/>.
     /// </summary>
     public bool ShouldCompact(IReadOnlyList<AgentMessage> messages)
     {
         if (IsWindowMode)
         {
-            // Prefer provider-reported token counts when available — they include reasoning
-            // tokens that TruncateIntermediateAssistantReasoning strips from Content, so
-            // the char-based estimate would undercount them. Fall back to chars/4 only for
-            // messages that have no Usage record (e.g. injected system messages).
-            var estimated = messages.Sum(m =>
-                m.Usage is { } u ? u.TotalTokens : (m.Content?.Length ?? 0) / 4);
+            // Use the same chars/4 estimate as TrimToWindow so the trigger and the trim
+            // measure the same quantity. Usage.TotalTokens is the cumulative API call cost
+            // (InputTokens = full context at that turn, not just this message), so summing
+            // it across messages grows quadratically and diverges from the char-based budget
+            // that TokenBudget is calibrated against — causing the trigger to fire while
+            // TrimToWindow finds nothing to drop.
+            var estimated = messages.Sum(m => (m.Content?.Length ?? 0) / 4);
             if (estimated > config.TokenBudget)
             {
                 logger.LogDebug(
@@ -93,7 +95,9 @@ public bool ShouldCompact(IReadOnlyList<AgentMessage> messages)
 
     /// <summary>
     /// Drops the oldest user+assistant pairs from <paramref name="messages"/> until
-    /// the estimated token count is within <see cref="CompactionConfig.TokenBudget"/>.
+    /// the estimated token count (characters ÷ 4) is within <see cref="CompactionConfig.TokenBudget"/>.
+    /// Uses the same estimation as <see cref="ShouldCompact"/> so the trigger and the
+    /// trim always agree on when the budget is met.
     /// No LLM call is made; no summary message is injected.
     /// </summary>
     public IReadOnlyList<AgentMessage> TrimToWindow(IReadOnlyList<AgentMessage> messages)