From 31c0ee9bb2d3fb4e4a24660a715a8846d5c15d23 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 20:32:40 -0500 Subject: [PATCH 1/7] fix(orchestration): harden context token tracking and tool-result growth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Default MaxInTurnToolPairs to 12 when MaxContextTokens is set; without this, tool results accumulate O(N²) within a turn for budgeted agents that omit explicit in-turn limits - ShouldCompact (window mode) uses Usage.TotalTokens when available so reasoning tokens stripped from Content are counted toward the budget - AccumulateCompactedUsage folds all compacted-turn costs onto the summary message across every compaction path, keeping MaxTotalTokens accurate after resume cycles - TransitionAlreadyFired distinguishes blocked markers from fired ones so an A→B contract failure no longer suppresses the independent A→C signal --- src/Infrastructure/AgentFactory.cs | 7 +++- src/Orchestration/ConversationCompactor.cs | 40 +++++++++++++++---- .../StateMachineSelectionStrategy.cs | 31 ++++++++++---- 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/src/Infrastructure/AgentFactory.cs b/src/Infrastructure/AgentFactory.cs index 5a0da53..d3cde28 100644 --- a/src/Infrastructure/AgentFactory.cs +++ b/src/Infrastructure/AgentFactory.cs @@ -171,7 +171,12 @@ public AIAgent Create(AgentConfig config, Action? onToo // Deterministic sliding-window cap: always keep only the last N tool call/result // pairs in full, replacing older ones with placeholders unconditionally. // Applied before the budget-reactive trim so the window runs first. - var maxInTurnToolPairs = config.MaxInTurnToolPairs; + // When MaxContextTokens is set but no explicit pair limit is configured, default + // to 12 pairs to prevent O(N²) tool-result accumulation within a turn. + const int DefaultToolPairsWhenBudgeted = 12; + var maxInTurnToolPairs = config.MaxInTurnToolPairs > 0 + ? config.MaxInTurnToolPairs + : (resolvedModel.MaxContextTokens > 0 ? DefaultToolPairsWhenBudgeted : 0); // Tool schema overhead: computed once at build time since the tool list is fixed // for the lifetime of this agent. Included in the context budget and payload diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs index 8402c15..86713a5 100644 --- a/src/Orchestration/ConversationCompactor.cs +++ b/src/Orchestration/ConversationCompactor.cs @@ -58,7 +58,12 @@ public bool ShouldCompact(IReadOnlyList messages) { if (IsWindowMode) { - var estimated = messages.Sum(m => (m.Content?.Length ?? 0) / 4); + // Prefer provider-reported token counts when available — they include reasoning + // tokens that TruncateIntermediateAssistantReasoning strips from Content, so + // the char-based estimate would undercount them. Fall back to chars/4 only for + // messages that have no Usage record (e.g. injected system messages). + var estimated = messages.Sum(m => + m.Usage is { } u ? u.TotalTokens : (m.Content?.Length ?? 0) / 4); if (estimated > config.TokenBudget) { logger.LogDebug( @@ -162,6 +167,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess toCompact[0].TurnIndex, toCompact[^1].TurnIndex, cancellationToken); var intentSummary = BuildIntentDerivedSummary( toCompact[0].TurnIndex, toCompact[^1].TurnIndex, intents, prefixBlock); + intentSummary = intentSummary with { Usage = AccumulateCompactedUsage(toCompact, null) }; logger.LogInformation( "Intent compaction: {Compacted} turns replaced by intent log reconstruction ({IntentCount} intents).", toCompact.Count, intents.Count); @@ -185,6 +191,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess }; if (ExpandedNote is not null) reconstructed = reconstructed with { Content = reconstructed.Content + "\n\n---\n" + ExpandedNote }; + reconstructed = reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) }; logger.LogInformation( "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.", toCompact.Count); @@ -215,9 +222,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess Role = "user", TurnIndex = toCompact[^1].TurnIndex, IsCompactionSummary = true, - Usage = summUsage is not null - ? new TokenUsage(summUsage.InputTokens, summUsage.OutputTokens) - : null + Usage = AccumulateCompactedUsage(toCompact, summUsage) }; logger.LogInformation( @@ -231,7 +236,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess // LLM summary failed; return the lossless reconstruction alone so the session survives. logger.LogError(ex, "Hybrid compaction: LLM summary call failed — returning lossless reconstruction only."); - return (reconstructed, toRetain); + return (reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) }, toRetain); } } @@ -256,9 +261,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess Role = "user", TurnIndex = toCompact[^1].TurnIndex, IsCompactionSummary = true, - Usage = summaryUsage is not null - ? new TokenUsage(summaryUsage.InputTokens, summaryUsage.OutputTokens) - : null + Usage = AccumulateCompactedUsage(toCompact, summaryUsage) }; logger.LogInformation( @@ -279,6 +282,27 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess // Internals + // Sums the token costs of all compacted turns and folds in the summary-call cost. + // The total is stored on the summary AgentMessage so AgentOrchestrator can seed + // cumulativeTokens correctly on the next StreamAsync call (after resume/compaction), + // keeping MaxTotalTokens enforcement accurate across compaction boundaries. + private static TokenUsage? AccumulateCompactedUsage( + IReadOnlyList compacted, + TokenUsage? summaryCallUsage) + { + int totalInput = summaryCallUsage?.InputTokens ?? 0; + int totalOutput = summaryCallUsage?.OutputTokens ?? 0; + foreach (var m in compacted) + { + if (m.Usage is null) continue; + totalInput += m.Usage.InputTokens; + totalOutput += m.Usage.OutputTokens; + } + return (totalInput > 0 || totalOutput > 0) + ? new TokenUsage(totalInput, totalOutput) + : null; + } + private AgentMessage BuildIntentDerivedSummary( int firstTurn, int lastTurn, diff --git a/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs b/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs index 9c67e3b..122ef71 100644 --- a/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs +++ b/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs @@ -772,21 +772,36 @@ private static bool IsSignalOnOwnLine(string content, string signal) return false; } - // Returns true when a turn-boundary marker already exists after keywordIndex for - // the target state's agent — meaning this signal was consumed in a prior turn. + // Returns true when this specific transition was already consumed after signalIndex. + // + // Two marker types are checked: + // "[fuseraft:blocked {state}→{targetState}]" — the transition was evaluated and + // its contract failed; the signal must not be re-evaluated for that target. + // Markers for OTHER targets do not suppress this transition. + // Any other "[fuseraft: ...]" — a different transition fired, meaning the state + // machine already advanced; the signal is consumed regardless of target. private static bool TransitionAlreadyFired(IList history, int signalIndex, string targetState) { - // We look for "[fuseraft: X → Y]" markers after the signal message. - // Since we don't know the target agent name from here (only the target state), - // we use a simplified check: any turn-boundary marker after this index means - // the selector already processed this turn. for (int j = signalIndex + 1; j < history.Count; j++) { var m = history[j]; if (m.Role != ChatRole.User) continue; var text = m.Text; - if (!string.IsNullOrEmpty(text) && text.StartsWith("[fuseraft:", StringComparison.Ordinal)) - return true; + if (string.IsNullOrEmpty(text)) continue; + if (!text.StartsWith("[fuseraft:", StringComparison.Ordinal)) continue; + + // Blocking markers suppress only the transition they name. + // "[fuseraft:blocked A→B]" blocks A→B but must not block A→C. + if (text.StartsWith("[fuseraft:blocked ", StringComparison.Ordinal)) + { + if (text.Contains($"→{targetState}", StringComparison.OrdinalIgnoreCase)) + return true; + continue; // Different target — does not apply to this transition. + } + + // Any non-blocking marker means the state machine already acted on a signal + // in this lookback window (transition fired or parallel dispatched). + return true; } return false; } From 7a5380750cf41095c953b2abee851eb27ce6be92 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 20:38:11 -0500 Subject: [PATCH 2/7] fix(orchestration): pin corrections through trim and context assembly - Corrections injected mid-session were silently dropped for Context-spec agents because AssembleForAgentAsync bypasses shared-history replay; ExtractPendingCorrections re-injects any correction messages that appear after the agent's last assistant turn so retries always see the feedback - MaxTailMessages cut by raw position, so earlier corrections on long histories could fall outside the retained window; step 4 now pins correction messages and applies the tail limit only to non-pinned messages, preserving original order - IsCorrectionMessage is public so both paths share the same detection logic (prefix table + [fuseraft:blocked substring) --- src/Orchestration/ContextWindowFilter.cs | 68 ++++++++++++++++++++- src/Orchestration/HandoffContextResolver.cs | 37 +++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs index 4809188..0643319 100644 --- a/src/Orchestration/ContextWindowFilter.cs +++ b/src/Orchestration/ContextWindowFilter.cs @@ -118,8 +118,37 @@ public static IReadOnlyList Apply( } // Step 4: Tail limit — keep only the last N messages. + // Correction messages (RETRY, STAGNATION, [fuseraft:blocked, etc.) are pinned so they + // always survive the position-based cut. Non-correction messages are trimmed to the tail + // window; the final list preserves original message order. if (window.MaxTailMessages > 0 && list.Count > window.MaxTailMessages) - list = list.Skip(list.Count - window.MaxTailMessages).ToList(); + { + var pinnedSet = new HashSet( + Enumerable.Range(0, list.Count).Where(i => IsCorrectionMessage(list[i]))); + + if (pinnedSet.Count == 0) + { + list = list.Skip(list.Count - window.MaxTailMessages).ToList(); + } + else + { + var unpinnedIndices = Enumerable.Range(0, list.Count) + .Where(i => !pinnedSet.Contains(i)) + .ToList(); + + int firstKeptUnpinned = unpinnedIndices.Count > window.MaxTailMessages + ? unpinnedIndices[unpinnedIndices.Count - window.MaxTailMessages] + : 0; + + var kept = new List(list.Count); + for (int i = 0; i < list.Count; i++) + { + if (i >= firstKeptUnpinned || pinnedSet.Contains(i)) + kept.Add(list[i]); + } + list = kept; + } + } // Step 5: Sanitize tool_use/tool_result pairing at slice boundaries. // Steps 3 and 4 cut by position; either cut can land inside a tool-call/result @@ -342,6 +371,43 @@ private static List SanitizeToolPairs(List list) return result; } + // Prefixes that unambiguously identify a ChatRole.User correction injected by + // CorrectionEngine, routing strategies, or the orchestrator's verifier hook. + private static readonly string[] CorrectionPrefixes = + [ + "RETRY ", + "NO TOOL CALLS", + "CRITICAL:", + "APPROVED rejected:", + "WRONG KEYWORD:", + "JSON block correct", + "BUILD FAILURE:", + "STAGNATION (", + "STUCK ", + "HALLUCINATION:", + "PERSISTENT BUILD FAILURE", + "VERIFICATION FINDING", + "Files written this turn", + "No handoff keyword", + ]; + + /// + /// Returns true when is a correction injected by + /// , a routing strategy, + /// or the orchestrator's verifier hook. Used to pin corrections so they survive + /// trimming, and to re-inject them + /// into assembled agent contexts. + /// + public static bool IsCorrectionMessage(ChatMessage message) + { + if (message.Role != ChatRole.User) return false; + var text = message.Text ?? string.Empty; + if (text.Contains("[fuseraft:blocked", StringComparison.Ordinal)) return true; + foreach (var prefix in CorrectionPrefixes) + if (text.StartsWith(prefix, StringComparison.Ordinal)) return true; + return false; + } + // Maximum number of characters to replay from a single non-summary assistant message. // Agents sometimes produce verbose stream-of-consciousness reasoning text (3–5k output // tokens). When that text is replayed verbatim in every subsequent turn it causes diff --git a/src/Orchestration/HandoffContextResolver.cs b/src/Orchestration/HandoffContextResolver.cs index d5c5602..db395c0 100644 --- a/src/Orchestration/HandoffContextResolver.cs +++ b/src/Orchestration/HandoffContextResolver.cs @@ -167,6 +167,14 @@ public async Task> AssembleForAgentAsync( } } + // 4. Pending corrections — user correction messages injected into shared history after + // this agent's last turn. Context-spec agents replace shared-history replay entirely, + // so corrections written to shared history (by CorrectionEngine, routing strategies, + // or the verifier hook) would otherwise be invisible on the next invocation. Re-inject + // them here so the agent always sees the most recent feedback addressed to it. + var pendingCorrections = ExtractPendingCorrections(agentName, sharedHistory); + result.AddRange(pendingCorrections); + return result; } @@ -314,6 +322,35 @@ private static IReadOnlyList ExtractOwnHistory( return ownTurns.Select(t => t.Msg).ToList(); } + // ── Pending-correction extraction ─────────────────────────────────────── + + // Returns all correction messages in shared history that appear after the last + // assistant turn by agentName. These are unread corrections the agent has not yet + // acted on; they must be included in the assembled context so the agent sees them. + private static IReadOnlyList ExtractPendingCorrections( + string agentName, + IList history) + { + int lastOwnIdx = -1; + for (int i = history.Count - 1; i >= 0; i--) + { + if (history[i].Role == ChatRole.Assistant && + string.Equals(history[i].AuthorName, agentName, StringComparison.OrdinalIgnoreCase)) + { + lastOwnIdx = i; + break; + } + } + + var corrections = new List(); + for (int i = lastOwnIdx + 1; i < history.Count; i++) + { + if (ContextWindowFilter.IsCorrectionMessage(history[i])) + corrections.Add(history[i]); + } + return corrections; + } + // ── Helpers ────────────────────────────────────────────────────────────── private static (string Type, string? Param) ParseSource(string source) From fcadecd198f99f6b22d2a472970a794b61ff0e0b Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 20:43:29 -0500 Subject: [PATCH 3/7] feat(orchestration): per-tool result caps and tool call compaction - Global MaxToolResultChars cut search/grep at the same limit as file reads; ToolResultCharOverrides lets callers raise the cap per tool without relaxing the global default - ToolCallRecord entries were silently lost for compacted turns; AccumulateCompactedToolCalls now wires them into all five summary paths so telemetry and BuildModifiedFilesNote stay accurate --- src/Core/Models/ContextWindowConfig.cs | 19 ++++++++++ src/Orchestration/ContextWindowFilter.cs | 42 +++++++++++++++++++--- src/Orchestration/ConversationCompactor.cs | 38 +++++++++++++++++--- 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/Core/Models/ContextWindowConfig.cs b/src/Core/Models/ContextWindowConfig.cs index e6154ca..f262f44 100644 --- a/src/Core/Models/ContextWindowConfig.cs +++ b/src/Core/Models/ContextWindowConfig.cs @@ -105,4 +105,23 @@ public sealed record ContextWindowConfig /// Default: 0 (no truncation). /// public int MaxToolResultChars { get; init; } + + /// + /// Per-tool-name character limit overrides applied during tool result truncation. + /// When a key matches a tool function name (case-insensitive), its value is used as the + /// character cap for that tool's results instead of . + /// + /// + /// The primary use case is giving search and grep tools a higher limit than file-read + /// tools. For example: + /// + /// "ToolResultCharOverrides": { "search_content": 20000, "grep_file": 20000 } + /// + /// A value of 0 disables truncation for that tool entirely. + /// + /// + /// Only meaningful when is also set. + /// Default: empty (no overrides). + /// + public Dictionary ToolResultCharOverrides { get; init; } = []; } diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs index 0643319..266db0e 100644 --- a/src/Orchestration/ContextWindowFilter.cs +++ b/src/Orchestration/ContextWindowFilter.cs @@ -164,7 +164,7 @@ public static IReadOnlyList Apply( // When MaxToolResultChars is set, any FunctionResultContent string that exceeds // the limit is truncated and annotated with the omitted character count. if (window.MaxToolResultChars > 0) - list = TruncateToolResults(list, window.MaxToolResultChars); + list = TruncateToolResults(list, window.MaxToolResultChars, window.ToolResultCharOverrides); return list; } @@ -174,7 +174,10 @@ public static IReadOnlyList Apply( // The rest is elided — the model's mental model of the file is stale at that point anyway. private const int ConsumedReadCapChars = 500; - private static List TruncateToolResults(List list, int maxChars) + private static List TruncateToolResults( + List list, + int maxChars, + IReadOnlyDictionary? overrides = null) { // Fast path: no ChatRole.Tool messages in the slice. if (!list.Any(m => m.Role == ChatRole.Tool)) return list; @@ -184,6 +187,16 @@ private static List TruncateToolResults(List list, int // the model hasn't yet acted on are left at the normal maxChars limit. var consumedReadIds = BuildConsumedReadCallIds(list); + // Build callId → toolName so per-tool overrides can be resolved for each result. + var callToolNames = new Dictionary(StringComparer.Ordinal); + foreach (var msg in list) + { + if (msg.Role != ChatRole.Assistant) continue; + foreach (var c in msg.Contents) + if (c is FunctionCallContent fc && fc.CallId is not null) + callToolNames[fc.CallId] = fc.Name ?? string.Empty; + } + var result = new List(list.Count); foreach (var msg in list) { @@ -211,10 +224,29 @@ private static List TruncateToolResults(List list, int $"file was written or patched later this session; " + $"call read_file again if current content is needed]"; } - else if (s.Length > maxChars) + else { - truncated = s[..maxChars] + - $"\n[...truncated — {s.Length - maxChars:N0} chars omitted to reduce context size...]"; + // Resolve the per-tool limit: check overrides first, then fall back to maxChars. + // A zero override value disables truncation for that tool entirely. + int limit = maxChars; + if (overrides is { Count: > 0 } && + callToolNames.TryGetValue(fr.CallId ?? string.Empty, out var toolName)) + { + foreach (var kv in overrides) + { + if (string.Equals(kv.Key, toolName, StringComparison.OrdinalIgnoreCase)) + { + limit = kv.Value; + break; + } + } + } + + if (limit > 0 && s.Length > limit) + { + truncated = s[..limit] + + $"\n[...truncated — {s.Length - limit:N0} chars omitted to reduce context size...]"; + } } if (truncated is not null) diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs index 86713a5..555685f 100644 --- a/src/Orchestration/ConversationCompactor.cs +++ b/src/Orchestration/ConversationCompactor.cs @@ -167,7 +167,11 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess toCompact[0].TurnIndex, toCompact[^1].TurnIndex, cancellationToken); var intentSummary = BuildIntentDerivedSummary( toCompact[0].TurnIndex, toCompact[^1].TurnIndex, intents, prefixBlock); - intentSummary = intentSummary with { Usage = AccumulateCompactedUsage(toCompact, null) }; + intentSummary = intentSummary with + { + Usage = AccumulateCompactedUsage(toCompact, null), + ToolCalls = AccumulateCompactedToolCalls(toCompact), + }; logger.LogInformation( "Intent compaction: {Compacted} turns replaced by intent log reconstruction ({IntentCount} intents).", toCompact.Count, intents.Count); @@ -191,7 +195,11 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess }; if (ExpandedNote is not null) reconstructed = reconstructed with { Content = reconstructed.Content + "\n\n---\n" + ExpandedNote }; - reconstructed = reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) }; + reconstructed = reconstructed with + { + Usage = AccumulateCompactedUsage(toCompact, null), + ToolCalls = AccumulateCompactedToolCalls(toCompact), + }; logger.LogInformation( "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.", toCompact.Count); @@ -222,7 +230,8 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess Role = "user", TurnIndex = toCompact[^1].TurnIndex, IsCompactionSummary = true, - Usage = AccumulateCompactedUsage(toCompact, summUsage) + Usage = AccumulateCompactedUsage(toCompact, summUsage), + ToolCalls = AccumulateCompactedToolCalls(toCompact), }; logger.LogInformation( @@ -261,7 +270,8 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess Role = "user", TurnIndex = toCompact[^1].TurnIndex, IsCompactionSummary = true, - Usage = AccumulateCompactedUsage(toCompact, summaryUsage) + Usage = AccumulateCompactedUsage(toCompact, summaryUsage), + ToolCalls = AccumulateCompactedToolCalls(toCompact), }; logger.LogInformation( @@ -276,12 +286,30 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess logger.LogError(ex, "LLM compaction failed; inserting fallback marker for turns {First}–{Last}.", toCompact[0].TurnIndex, toCompact[^1].TurnIndex); - return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message), toRetain); + return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message) + with { ToolCalls = AccumulateCompactedToolCalls(toCompact) }, toRetain); } } // Internals + // Collects all ToolCallRecord entries from the compacted turns into a flat list so the + // summary message preserves them. Downstream consumers (telemetry, BuildModifiedFilesNote) + // inspect ToolCalls on AgentMessages; without this they silently drop records for any turn + // that was compacted, producing incomplete data for succeeded/failed tool tracking. + private static IReadOnlyList? AccumulateCompactedToolCalls( + IReadOnlyList compacted) + { + List? all = null; + foreach (var m in compacted) + { + if (m.ToolCalls is not { Count: > 0 }) continue; + all ??= []; + all.AddRange(m.ToolCalls); + } + return all; + } + // Sums the token costs of all compacted turns and folds in the summary-call cost. // The total is stored on the summary AgentMessage so AgentOrchestrator can seed // cumulativeTokens correctly on the next StreamAsync call (after resume/compaction), From 83fda6ac9e7a42538ab30fdbcabcc3b7eda16218 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 20:53:12 -0500 Subject: [PATCH 4/7] fix(orchestration): budget guard, anti-thrash, sub-agent stagnation - AgentOrchestrator: add EstimateContextTokens helper and a pre-turn budget guard that aborts before agent.RunAsync when cumulativeTokens + estimated input tokens > MaxTotalTokens, preventing expensive one-turn overshoots. - CompactionConfig: raise AntiThrashWindow default from 3 to 10 so a single productive compaction no longer resets the guard in long sessions. - CorrectionEngine / GraphOrchestrator: add optional turnToolCalls parameter to InjectNoKeywordCorrection; the no-tool-calls gate also checks AgentMessage.ToolCalls so SubAgentPlugin responses are not misclassified as stagnation. --- src/Core/Models/CompactionConfig.cs | 4 +-- src/Orchestration/AgentOrchestrator.cs | 35 +++++++++++++++++++ src/Orchestration/GraphOrchestrator.cs | 6 ++-- .../Workflow/CorrectionEngine.cs | 9 +++-- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/Core/Models/CompactionConfig.cs b/src/Core/Models/CompactionConfig.cs index 508a791..f0cdd20 100644 --- a/src/Core/Models/CompactionConfig.cs +++ b/src/Core/Models/CompactionConfig.cs @@ -111,7 +111,7 @@ public record CompactionConfig /// /// Number of recent compaction outcomes to examine for the anti-thrash guard. /// Only suppresses compaction once this many outcomes have been recorded. - /// Default: 3. Set to 0 to disable the anti-thrash check. + /// Default: 10. Set to 0 to disable the anti-thrash check. /// - public int AntiThrashWindow { get; init; } = 3; + public int AntiThrashWindow { get; init; } = 10; } diff --git a/src/Orchestration/AgentOrchestrator.cs b/src/Orchestration/AgentOrchestrator.cs index 9d639b1..613c3f4 100644 --- a/src/Orchestration/AgentOrchestrator.cs +++ b/src/Orchestration/AgentOrchestrator.cs @@ -475,6 +475,22 @@ await eventEmitter.EmitAsync("turn_end", history.Count, filtered.Count); + // Pre-turn budget guard: estimate the input token cost of this context slice and + // abort before the LLM call if cumulative + estimated input would exceed the limit. + // Prevents the one-turn overshoot that occurs when the post-yield check fires too + // late (e.g. a file-read turn that consumes tens of thousands of tokens). + if (config.MaxTotalTokens is { } preTurnLimit) + { + var estimatedInputTokens = EstimateContextTokens(context); + if (cumulativeTokens + estimatedInputTokens > preTurnLimit) + { + logger.LogWarning( + "[Orchestrator] Pre-turn budget guard: cumulative {Cumulative:N0} + estimated input {Estimated:N0} > limit {Limit:N0} — aborting before turn.", + cumulativeTokens, estimatedInputTokens, preTurnLimit); + throw new BudgetExceededException(cumulativeTokens + estimatedInputTokens, preTurnLimit); + } + } + AgentResponse response = governanceKernel?.CircuitBreaker is { } cb ? await cb.ExecuteAsync(() => agent.RunAsync(context, null, null, cancellationToken)) : await agent.RunAsync(context, null, null, cancellationToken); @@ -769,4 +785,23 @@ private static void WireDidResolver(ITerminationCondition condition, Func Guid.NewGuid().ToString("N")[..8]; + + // Estimates the input token cost of a context slice by summing all content chars across + // message types and dividing by 4. Used for the pre-turn budget guard; intentionally + // conservative (actual tokenisation may differ but is rarely smaller than chars/4). + private static int EstimateContextTokens(IEnumerable messages) + { + int chars = 0; + foreach (var msg in messages) + foreach (var content in msg.Contents) + chars += content switch + { + TextContent tc => tc.Text?.Length ?? 0, + FunctionCallContent fc => (fc.Name?.Length ?? 0) + + (fc.Arguments?.Values.Sum(v => v?.ToString()?.Length ?? 0) ?? 0), + FunctionResultContent fr => fr.Result?.ToString()?.Length ?? 0, + _ => 0, + }; + return chars / 4; + } } diff --git a/src/Orchestration/GraphOrchestrator.cs b/src/Orchestration/GraphOrchestrator.cs index 028a5c3..abd5a8c 100644 --- a/src/Orchestration/GraphOrchestrator.cs +++ b/src/Orchestration/GraphOrchestrator.cs @@ -1165,7 +1165,8 @@ await eventEmitter.EmitAsync("no_keyword", int histBefore2 = ctx.History.Count; await CorrectionEngine.InjectNoKeywordCorrection( - ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter); + ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter, + agentMsg.ToolCalls); await PersistCorrectionsAsync(ctx, histBefore2, ct).ConfigureAwait(false); if (consecutiveFails >= maxRetries) @@ -1623,7 +1624,8 @@ await eventEmitter.EmitAsync("no_keyword", int histBefore2 = ctx.History.Count; await CorrectionEngine.InjectNoKeywordCorrection( - ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter); + ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter, + agentMsg.ToolCalls); await PersistCorrectionsAsync(ctx, histBefore2, ct).ConfigureAwait(false); if (consecutiveFails >= maxRetries) diff --git a/src/Orchestration/Workflow/CorrectionEngine.cs b/src/Orchestration/Workflow/CorrectionEngine.cs index 2291fe6..61ac8ff 100644 --- a/src/Orchestration/Workflow/CorrectionEngine.cs +++ b/src/Orchestration/Workflow/CorrectionEngine.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.AI; using fuseraft.Core; +using fuseraft.Core.Models; namespace fuseraft.Orchestration.Workflow; @@ -43,7 +44,8 @@ internal static async Task InjectNoKeywordCorrection( string agentName, int consecutiveCount, AgentRouteTable routeTable, - EventEmitter? eventEmitter = null) + EventEmitter? eventEmitter = null, + IReadOnlyList? turnToolCalls = null) { var validKeywordList = BuildValidKeywordList(routeTable); bool isReviewerType = routeTable.PhaseBreakKeywords.Contains("APPROVED"); @@ -51,7 +53,10 @@ internal static async Task InjectNoKeywordCorrection( if (TryInjectForeignKeywordCorrection(history, responseText, routeTable, agentName, validKeywordList)) return; if (TryInjectCodeBlockCorrection(history, responseText, isReviewerType, validKeywordList)) return; - if (!CurrentTurnHasToolCalls(history)) + // Also treat as "has tool calls" when the AgentMessage records sub-agent tool calls + // that ran inside a SubAgentPlugin — those don't produce ChatRole.Tool entries in the + // outer history so CurrentTurnHasToolCalls would return false without this check. + if (!CurrentTurnHasToolCalls(history) && (turnToolCalls is null || turnToolCalls.Count == 0)) { InjectNoToolCallsCorrection(history, isReviewerType, validKeywordList); return; From 245d22d59ec830612c4f5400071558f4e938651a Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 21:16:41 -0500 Subject: [PATCH 5/7] fix(orchestration): intent fallback, sandbox writes, evidence verifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - intent mode silently downgraded when intentLog is null; agents resuming after compaction had no signal the summary was degraded — now prepends a visible COMPACTION WARNING block and emits a startup LogWarning when Compaction.Mode=intent but no IntentLogPath is configured - patch_file / create_directory / delete_directory / set_permissions / copy_file / move_file bypassed sandbox boundary check when no FileSystemPermissions globs were configured; added SandboxedExtendedWriteFunctions so these always route through InspectFileSystem regardless of glob state - verifier post-turn block only fired on EveryNTurns; TriggerOnSuspiciousTransition had no effect outside StateMachineSelectionStrategy — wired HasSuspiciousTransitionSignal to detect ConflictingEvidence/NoProgress corrections injected by SelectAsync and trigger the verifier immediately; pins EVIDENCE INCONSISTENCY / EVIDENCE AUDIT REQUIRED / MISSING ARTIFACT in CorrectionPrefixes so they survive MaxTailMessages trim - expose ReadSessionContextAsync on ContextAssembler and auto-inject session context summary for agents without an explicit Context spec - default IncludeReasoning and IncludeSymbolGraph to true; add MaxReplayChars to ContextWindowConfig with per-agent TruncateAssistantContent step in Apply --- src/Cli/OrchestratorBuilder.cs | 9 +++ src/Core/Models/CompactionConfig.cs | 12 ++-- src/Core/Models/ContextWindowConfig.cs | 15 +++++ .../Plugins/SandboxEnforcementFilter.cs | 15 ++++- src/Orchestration/AgentOrchestrator.cs | 59 ++++++++++++++++--- src/Orchestration/ContextWindowFilter.cs | 56 +++++++++++++++--- src/Orchestration/ConversationCompactor.cs | 22 +++++-- src/Orchestration/HandoffContextResolver.cs | 8 +++ 8 files changed, 169 insertions(+), 27 deletions(-) diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index ee3c345..e164e9f 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -713,6 +713,15 @@ t.Pattern is not null || chatClientFactory.Create(summaryModel), compactionConfig, loggerFactory.CreateLogger(), resumptionNote, changeLogPath, intentLog, config.Events?.Path, evidenceStore); + + if ((compactionConfig.Mode ?? string.Empty).Equals("intent", StringComparison.OrdinalIgnoreCase) + && intentLog is null) + { + loggerFactory.CreateLogger(nameof(OrchestratorBuilder)).LogWarning( + "Compaction.Mode is 'intent' but no ChangeTracking.IntentLogPath is configured — " + + "compaction will fall back to lossless or LLM mode at runtime. " + + "Set ChangeTracking.IntentLogPath to enable deterministic intent compaction."); + } } // Build the post-session skill curator when curation is enabled. diff --git a/src/Core/Models/CompactionConfig.cs b/src/Core/Models/CompactionConfig.cs index f0cdd20..d6e4a96 100644 --- a/src/Core/Models/CompactionConfig.cs +++ b/src/Core/Models/CompactionConfig.cs @@ -67,19 +67,21 @@ public record CompactionConfig /// When true, reasoning excerpts from the compacted turn range are prepended to /// the compaction summary. Each excerpt is truncated to approximately 500 tokens so agents /// resuming after compaction can see the WHY behind prior decisions, not just the artifacts. - /// Reads reasoning events from the session's events log. Default: false. + /// Reads reasoning events from the session's events log. When the events log is + /// absent or contains no reasoning events the block is omitted silently. + /// Default: true. /// - public bool IncludeReasoning { get; init; } = false; + public bool IncludeReasoning { get; init; } = true; /// /// When true, a symbol dependency graph derived from the session's changed files is /// prepended to the compaction summary (before reasoning excerpts when both are enabled). /// Queries SymbolDefinition and SymbolReference nodes from the evidence store /// for every file written during the session, giving agents an explicit map of what symbols - /// were in scope across the compacted turns. Requires an active EvidenceStore. - /// Default: false. + /// were in scope across the compacted turns. When no evidence store is wired or no symbol + /// nodes are found the block is omitted silently. Default: true. /// - public bool IncludeSymbolGraph { get; init; } = false; + public bool IncludeSymbolGraph { get; init; } = true; /// /// Optional custom prompt template for LLM-mode compaction. When set, replaces the diff --git a/src/Core/Models/ContextWindowConfig.cs b/src/Core/Models/ContextWindowConfig.cs index f262f44..dcb5874 100644 --- a/src/Core/Models/ContextWindowConfig.cs +++ b/src/Core/Models/ContextWindowConfig.cs @@ -106,6 +106,21 @@ public sealed record ContextWindowConfig /// public int MaxToolResultChars { get; init; } + /// + /// Maximum characters to replay from a single non-summary assistant message in the + /// history slice passed to this agent. When an assistant message text exceeds this limit + /// the content is truncated and annotated with the omitted character count. + /// + /// + /// Agents sometimes produce multi-thousand-character reasoning blocks that are replayed + /// verbatim on every subsequent turn, compounding input-token growth. Compaction-summary + /// messages are never truncated regardless of this setting. + /// + /// + /// Default: 0 (uses the global 2,000-char fallback applied during session replay). + /// + public int MaxReplayChars { get; init; } + /// /// Per-tool-name character limit overrides applied during tool result truncation. /// When a key matches a tool function name (case-insensitive), its value is used as the diff --git a/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs b/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs index ef279a2..8da4ab1 100644 --- a/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs +++ b/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs @@ -74,6 +74,15 @@ public sealed class SandboxEnforcementFilter private static readonly string[] FileSystemFunctions = ["read_file", "write_file", "delete_file", "list_files"]; + // Write-type extended functions that must always be routed through InspectFileSystem for + // sandbox boundary checks, even when no FileSystemPermissions glob matchers are configured. + // These functions create, modify, or remove paths and must stay within the sandbox root. + private static readonly HashSet SandboxedExtendedWriteFunctions = new(StringComparer.OrdinalIgnoreCase) + { + "patch_file", "create_directory", "delete_directory", "set_permissions", + "copy_file", "move_file", + }; + private static readonly string[] ShellFunctions = ["shell_run", "shell_run_script"]; @@ -205,11 +214,13 @@ public AIAgent WrapAgent(AIAgent agent) => var ringDenial = InspectRing(functionName); if (ringDenial is not null) return ringDenial; - // Core FS functions are always sandboxed; extended functions are routed when any glob - // matcher is configured so they get sandbox + deny/read/write checks. + // Core FS functions are always sandboxed; write-type extended functions are also + // always sandboxed (boundary check only). Other extended functions are routed when + // any glob matcher is configured so they get sandbox + deny/read/write checks. bool hasGlobMatcher = _fsDenyMatcher is not null || _fsReadMatcher is not null || _fsWriteMatcher is not null; bool isFsFunction = FileSystemFunctions.Any(f => string.Equals(f, functionName, StringComparison.OrdinalIgnoreCase)) + || SandboxedExtendedWriteFunctions.Contains(functionName) || (hasGlobMatcher && AllExtendedFsFunctions.Contains(functionName)); if (isFsFunction) diff --git a/src/Orchestration/AgentOrchestrator.cs b/src/Orchestration/AgentOrchestrator.cs index 613c3f4..46216c7 100644 --- a/src/Orchestration/AgentOrchestrator.cs +++ b/src/Orchestration/AgentOrchestrator.cs @@ -419,7 +419,11 @@ await eventEmitter.EmitAsync("turn_end", } // Select the next agent. + // Capture the history count before selection so correction messages injected by + // the strategy (ConflictingEvidence / NoProgress) can be identified afterwards. + int preSelectCount = history.Count; var agent = await selection.SelectAsync(agents, history, cancellationToken); + int postSelectCount = history.Count; if (agent is null) break; logger.LogDebug( @@ -445,7 +449,9 @@ await eventEmitter.EmitAsync("turn_end", // sees only what it needs rather than the full session transcript. The shared // history list is still updated after the turn so routing/termination strategies // continue to work normally. - // When no Context spec is set, fall back to the traditional ContextWindow filter. + // When no Context spec is set, fall back to the traditional ContextWindow filter + // and auto-inject the session context summary (context_summary.md) as the second + // message when it exists, preventing agents from wasting turns re-reading brief.json. var agentCfg = agentConfigs.GetValueOrDefault(agent.Name ?? ""); IReadOnlyList filtered; if (agentCfg?.Context is { Count: > 0 } agentContextSources && contextAssembler is not null) @@ -459,7 +465,22 @@ await eventEmitter.EmitAsync("turn_end", } else { - filtered = ContextWindowFilter.Apply(history, agentCfg?.ContextWindow); + var raw = ContextWindowFilter.Apply(history, agentCfg?.ContextWindow); + if (contextAssembler is not null) + { + var sessionCtx = await contextAssembler.ReadSessionContextAsync(cancellationToken); + if (sessionCtx is not null) + { + var withCtx = new List(raw.Count + 1); + if (raw.Count > 0) withCtx.Add(raw[0]); + withCtx.Add(new ChatMessage(ChatRole.User, + $"[Session Context]\n\n{sessionCtx.Trim()}")); + withCtx.AddRange(raw.Skip(1)); + filtered = withCtx; + } + else filtered = raw; + } + else filtered = raw; } IEnumerable context = (hasInstructions || memoryManager is not null) && instructions is not null @@ -597,13 +618,16 @@ await eventEmitter.EmitAsync("reasoning", if (memoryManager is not null) await memoryManager.PostTurnAsync(agentMessage.AgentName, [..history], cancellationToken); - // Periodic verifier: run the meta-agent every N turns to audit evidence. - // Skipped when the verifier itself just ran to prevent self-loops. - if (config.Verifier is { EveryNTurns: > 0 } verCfg + // Periodic verifier: run the meta-agent every N turns to audit evidence, OR + // immediately when a ConflictingEvidence / NoProgress correction was injected this + // turn (evidence-driven trigger). Skipped when the verifier itself just ran. + if (config.Verifier is { } verCfg && verifierAgent is not null - && agentMessage.TurnIndex > 0 - && agentMessage.TurnIndex % verCfg.EveryNTurns == 0 - && !string.Equals(agentMessage.AgentName, verCfg.AgentName, StringComparison.OrdinalIgnoreCase)) + && !string.Equals(agentMessage.AgentName, verCfg.AgentName, StringComparison.OrdinalIgnoreCase) + && ( + (verCfg.EveryNTurns > 0 && agentMessage.TurnIndex > 0 && agentMessage.TurnIndex % verCfg.EveryNTurns == 0) + || (verCfg.TriggerOnSuspiciousTransition && HasSuspiciousTransitionSignal(history, preSelectCount, postSelectCount)) + )) { AgentStarting?.Invoke(verifierAgent.Name ?? "Verifier"); agentFactory.OnAgentTurnStarting(); @@ -786,6 +810,25 @@ private static void WireDidResolver(ITerminationCondition condition, Func Guid.NewGuid().ToString("N")[..8]; + // Scans messages at indices [from, to) for ConflictingEvidence or NoProgress correction + // signals injected by the selection strategy. Returns true when any such signal is found, + // indicating the verifier should audit the current turn's output. + private static bool HasSuspiciousTransitionSignal(IList history, int from, int to) + { + for (int i = from; i < to && i < history.Count; i++) + { + var msg = history[i]; + if (msg.Role != ChatRole.User) continue; + var text = msg.Text ?? string.Empty; + if (text.StartsWith("NO TOOL CALLS", StringComparison.Ordinal) || + text.StartsWith("CRITICAL:", StringComparison.Ordinal) || + text.Contains("EVIDENCE INCONSISTENCY", StringComparison.Ordinal) || + text.Contains("EVIDENCE AUDIT REQUIRED", StringComparison.Ordinal)) + return true; + } + return false; + } + // Estimates the input token cost of a context slice by summing all content chars across // message types and dividing by 4. Used for the pre-turn budget guard; intentionally // conservative (actual tokenisation may differ but is rarely smaller than chars/4). diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs index 266db0e..cdc8cfd 100644 --- a/src/Orchestration/ContextWindowFilter.cs +++ b/src/Orchestration/ContextWindowFilter.cs @@ -166,9 +166,48 @@ public static IReadOnlyList Apply( if (window.MaxToolResultChars > 0) list = TruncateToolResults(list, window.MaxToolResultChars, window.ToolResultCharOverrides); + // Step 7: Truncate verbose assistant messages. + // When MaxReplayChars is set, assistant text content that exceeds the limit is + // truncated. Compaction-summary messages (marked by their header prefix) are exempt. + if (window.MaxReplayChars > 0) + list = TruncateAssistantContent(list, window.MaxReplayChars); + return list; } + private static List TruncateAssistantContent(List list, int maxChars) + { + var result = new List(list.Count); + foreach (var msg in list) + { + if (msg.Role != ChatRole.Assistant) + { + result.Add(msg); + continue; + } + + var textContent = string.Concat(msg.Contents.OfType().Select(t => t.Text)); + // Compaction summaries are already compact — skip them unconditionally. + if (textContent.StartsWith("[CONVERSATION SUMMARY", StringComparison.Ordinal) || + textContent.Length <= maxChars) + { + result.Add(msg); + continue; + } + + var truncated = textContent[..maxChars] + + $"\n[...truncated — {textContent.Length - maxChars:N0} chars omitted to reduce context size...]"; + + var newContents = msg.Contents + .Where(c => c is not TextContent) + .Prepend(new TextContent(truncated)) + .ToList(); + + result.Add(new ChatMessage(ChatRole.Assistant, newContents) { AuthorName = msg.AuthorName }); + } + return result; + } + // How much of a consumed read_file result to keep for structural context (file shape, // imports, class header) after a downstream write/patch confirms the content was acted on. // The rest is elided — the model's mental model of the file is stale at that point anyway. @@ -421,6 +460,9 @@ private static List SanitizeToolPairs(List list) "VERIFICATION FINDING", "Files written this turn", "No handoff keyword", + "EVIDENCE INCONSISTENCY", // ConflictingEvidence (KeywordSelectionStrategy) + "EVIDENCE AUDIT REQUIRED", // ConflictingEvidence (StateMachineSelectionStrategy) + "MISSING ARTIFACT", // MissingEvidence (both strategies) ]; /// @@ -440,28 +482,28 @@ public static bool IsCorrectionMessage(ChatMessage message) return false; } - // Maximum number of characters to replay from a single non-summary assistant message. + // Global default applied during checkpoint-resume replay when no per-agent limit is set. // Agents sometimes produce verbose stream-of-consciousness reasoning text (3–5k output // tokens). When that text is replayed verbatim in every subsequent turn it causes // compaction summaries to grow each cycle and in-turn input tokens to balloon (450k+). // Compaction summaries (IsCompactionSummary) are already compact and are never truncated. - private const int MaxReplayChars = 2_000; + internal const int DefaultMaxReplayChars = 2_000; /// /// Returns the content string to replay for into the next /// StreamAsync call's history. Verbose non-summary assistant messages are - /// truncated at to prevent compounding context growth. + /// truncated at to prevent compounding context growth. /// - public static string TruncateReplayContent(AgentMessage message) + public static string TruncateReplayContent(AgentMessage message, int maxReplayChars = DefaultMaxReplayChars) { var content = message.Content ?? string.Empty; if (message.IsCompactionSummary || message.Role != "assistant" - || content.Length <= MaxReplayChars) + || content.Length <= maxReplayChars) return content; - return content[..MaxReplayChars] + - $"\n[...truncated — {content.Length - MaxReplayChars:N0} chars omitted to reduce context size...]"; + return content[..maxReplayChars] + + $"\n[...truncated — {content.Length - maxReplayChars:N0} chars omitted to reduce context size...]"; } } diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs index 555685f..c2ce79a 100644 --- a/src/Orchestration/ConversationCompactor.cs +++ b/src/Orchestration/ConversationCompactor.cs @@ -159,6 +159,9 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess var prefixBlock = CombineBlocks(symbolBlock, reasoningBlock); // Intent mode: reconstruct from the intent log — fully deterministic, no LLM call. + // When the intent log is unavailable, record a visible fallback notice so agents + // resuming after compaction know the summary was degraded. + string? intentFallbackNotice = null; if (mode == "intent") { if (intentLog is not null) @@ -179,7 +182,12 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess } logger.LogWarning( - "Compaction mode is 'intent' but no intent log is available — falling back to lossless/llm."); + "Compaction mode is 'intent' but no intent log is available — falling back to lossless/llm. " + + "Configure ChangeTracking.IntentLogPath to enable deterministic intent compaction."); + intentFallbackNotice = + "[COMPACTION WARNING: 'intent' mode was requested but no intent log is wired — " + + "this summary was generated using fallback compaction (lossless or LLM). " + + "Configure ChangeTracking.IntentLogPath to suppress this warning.]"; // Fall through to lossless / llm. } @@ -203,7 +211,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess logger.LogInformation( "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.", toCompact.Count); - return (reconstructed, toRetain); + return (PrependFallbackNotice(reconstructed, intentFallbackNotice), toRetain); } // Hybrid: prepend reconstruction before the LLM summary. @@ -278,7 +286,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess "Compaction complete. Turns 0–{Last} replaced by summary.", toCompact[^1].TurnIndex); - return (summary, toRetain); + return (PrependFallbackNotice(summary, intentFallbackNotice), toRetain); } catch (OperationCanceledException) { throw; } catch (Exception ex) @@ -286,8 +294,9 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess logger.LogError(ex, "LLM compaction failed; inserting fallback marker for turns {First}–{Last}.", toCompact[0].TurnIndex, toCompact[^1].TurnIndex); - return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message) - with { ToolCalls = AccumulateCompactedToolCalls(toCompact) }, toRetain); + var fallback = BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message) + with { ToolCalls = AccumulateCompactedToolCalls(toCompact) }; + return (PrependFallbackNotice(fallback, intentFallbackNotice), toRetain); } } @@ -528,6 +537,9 @@ private void RecordSavings(double ratio) _recentSavings.Dequeue(); } + private static AgentMessage PrependFallbackNotice(AgentMessage msg, string? notice) => + notice is null ? msg : msg with { Content = notice + "\n\n" + msg.Content }; + private AgentMessage BuildFallbackSummary(int firstTurn, int lastTurn, string errorMessage) { var content = diff --git a/src/Orchestration/HandoffContextResolver.cs b/src/Orchestration/HandoffContextResolver.cs index db395c0..b730bf9 100644 --- a/src/Orchestration/HandoffContextResolver.cs +++ b/src/Orchestration/HandoffContextResolver.cs @@ -54,6 +54,14 @@ public ContextAssembler( public void SetSessionId(string sessionId) => _sessionId = sessionId; + /// + /// Returns the current session context summary, or null when the file does not + /// exist or is empty. Used by orchestrators to auto-inject context for agents that do not + /// declare an explicit Context spec. + /// + public Task ReadSessionContextAsync(CancellationToken ct = default) + => ResolveSessionContextAsync(ct); + // ── Handoff injection (state machine transitions) ──────────────────────── /// From 4f47483b0a1e978f00c61f1fa1c02b10d7446394 Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 21:21:35 -0500 Subject: [PATCH 6/7] docs: sync ContextWindow fields, compaction defaults, sandbox coverage - IncludeReasoning and IncludeSymbolGraph now default to true; docs reflected the old false defaults, which would mislead users who expect opt-in behavior - AntiThrashWindow default raised from 3 to 10 to prevent a single productive compaction from prematurely resetting the guard on long sessions - MaxReplayChars and ToolResultCharOverrides are new ContextWindow fields; omitting them from the reference left the per-agent replay-truncation and per-tool cap-override knobs undiscoverable - Security sandbox table was missing the extended write functions (patch_file, create_directory, etc.) whose sandbox enforcement was just fixed to fire even when no FileSystemPermissions globs are configured --- docs/configuration.md | 9 ++++--- docs/context-management.md | 54 +++++++++++++++++++++++++------------- docs/security.md | 13 +++++---- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 9b36f0c..cd09baf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -309,6 +309,9 @@ Filters are applied in order: `TextOnly` / `ExcludeAgents` first, then `MaxTurnA | `MaxTurnAge` | int | `0` | Keep only messages from the last N agent turns (each turn ends at an assistant reply). Applied after `TextOnly`/`ExcludeAgents` and before `MaxTailMessages`. Semantic alternative to a raw message count — discards entire early-session phases rather than an arbitrary number of messages. `0` means no limit. | | `MaxTailMessages` | int | `0` | After the above filters, keep only the last N messages. `0` means no limit. | | `ContextCapFraction` | double | `0.0` | Soft-cap threshold expressed as a fraction of `MaxTailMessages` (e.g. `0.8` = 80%). When the filtered count exceeds this threshold a `context_cap_warning` event is emitted. Does not change trim behavior — use `MaxTailMessages` to hard-cap. `0.0` disables the warning. | +| `MaxToolResultChars` | int | `0` | Truncate `FunctionResultContent` strings in the replayed history slice to this many characters. A suffix noting the omitted count is appended. `0` disables truncation. See [context-management — Tool-result truncation](context-management.md#tool-result-truncation-maxtoolresultchars). | +| `ToolResultCharOverrides` | object | `{}` | Per-tool-name character cap overrides. Keys are tool function names (case-insensitive); values are the character limit for that tool's results, overriding `MaxToolResultChars`. A value of `0` disables truncation for that tool. Only meaningful when `MaxToolResultChars` is also set. | +| `MaxReplayChars` | int | `0` | Truncate non-summary assistant messages in the replayed history to this many characters. `0` uses the global 2,000-character fallback. Compaction summaries are never truncated. | **`TextOnly: true`** is the primary lever for context reduction. A Reviewer that independently re-reads files and re-runs commands gains nothing from hundreds of tool results produced by the Developer — stripping them can reduce input tokens by 90%+ in typical sessions. @@ -721,11 +724,11 @@ Compaction: | `Model` | object | first agent's model | Model used for generating the summary (`llm` and `hybrid` modes only). | | `Mode` | string | `"llm"` | Compaction mode. See below. | | `TokenBudget` | int | `80000` | Estimated token budget for `window` mode. Oldest message pairs are dropped until the total estimated token count (characters ÷ 4) falls within this limit. Ignored by all other modes. | -| `IncludeReasoning` | bool | `false` | When `true`, reasoning excerpts from the compacted turns are prepended to the summary as a `[REASONING EXCERPTS]` block. Each excerpt is truncated to ~500 tokens so agents resuming after compaction can see the WHY behind prior decisions. Reads `reasoning` events from the session events log (`Events.Path`). Has no effect when `Events` is not configured. | -| `IncludeSymbolGraph` | bool | `false` | When `true`, a `[SYMBOL DEPENDENCY GRAPH]` block is prepended to the summary (before `[REASONING EXCERPTS]` when both are enabled). The block lists every `SymbolDefinition` and `SymbolReference` node in the evidence graph for files written during the session, giving agents an explicit map of what symbols were in scope. Requires `EvidenceStore` and `ChangeTracking` to be configured. | +| `IncludeReasoning` | bool | `true` | Prepends a `[REASONING EXCERPTS]` block to the compaction summary. Each excerpt is truncated to ~500 tokens so agents resuming after compaction can see the WHY behind prior decisions. Reads `reasoning` events from the session events log (`Events.Path`). Omitted silently when `Events` is not configured or contains no reasoning events. Set to `false` to suppress. | +| `IncludeSymbolGraph` | bool | `true` | Prepends a `[SYMBOL DEPENDENCY GRAPH]` block to the summary (before `[REASONING EXCERPTS]` when both are enabled). Lists every `SymbolDefinition` and `SymbolReference` node in the evidence graph for files written during the session. Omitted silently when no evidence store is wired or no symbol nodes are found. Requires `EvidenceStore` and `ChangeTracking` to be configured. Set to `false` to suppress. | | `MaxCharsPerHistoryMessage` | int | `8000` | Maximum characters to include from any single message when building the history text passed to the LLM summarizer. Messages that exceed this limit are truncated and annotated with a `[TRUNCATED]` marker; any tool calls recorded for that turn are appended as a compact one-line list so the summarizer still knows what happened. Set to `0` to disable truncation. | | `AntiThrashMinSavingsRatio` | float | `0.10` | Minimum savings ratio (0–1) a compaction must achieve to count as effective. If the last `AntiThrashWindow` compactions all saved less than this fraction of the conversation, `ShouldCompact` returns `false` until the history grows past the trigger again. Prevents repeated LLM calls that reduce size by less than 10%. Set to `0` to disable. | -| `AntiThrashWindow` | int | `3` | Number of recent compaction outcomes to examine for the anti-thrash guard. The guard only suppresses compaction once this many outcomes have been recorded. Set to `0` to disable. | +| `AntiThrashWindow` | int | `10` | Number of recent compaction outcomes to examine for the anti-thrash guard. The guard only suppresses compaction once this many outcomes have been recorded. Set to `0` to disable. | | `SummaryTemplate` | string | built-in | Custom Liquid-style template for the LLM summary prompt. Supports `{{$task}}`, `{{$turn_count}}`, `{{$change_log}}`, and `{{$history}}` substitutions. When omitted, the built-in structured template is used — see [Compaction summary template](#compaction-summary-template). | **Compaction modes** diff --git a/docs/context-management.md b/docs/context-management.md index fa3d380..9207274 100644 --- a/docs/context-management.md +++ b/docs/context-management.md @@ -165,6 +165,10 @@ Agents: MaxTailMessages: 40 # hard cap after the above filters ContextCapFraction: 0.8 # emit context_cap_warning when at 80% of MaxTailMessages MaxToolResultChars: 8000 # truncate individual tool results in replayed history + ToolResultCharOverrides: # raise the cap for specific tools + search_content: 20000 + grep_file: 20000 + MaxReplayChars: 4000 # truncate verbose assistant messages in replayed history ``` ### TextOnly @@ -201,13 +205,22 @@ Hard cap applied after the other filters. When the filtered list still exceeds t the oldest messages are dropped. Set `ContextCapFraction` to receive a `context_cap_warning` event as an early signal before the hard cap is reached. -### Replay truncation +### Replay truncation (`MaxReplayChars`) Agents sometimes produce verbose stream-of-consciousness output (3–5k tokens). When that text is replayed verbatim in every subsequent turn, compaction summaries grow each cycle and input -tokens balloon. fuseraft automatically truncates verbose non-summary assistant messages to -2,000 characters when replaying them into the next turn's history. Compaction summaries are -never truncated. +tokens balloon. fuseraft truncates verbose non-summary assistant messages to 2,000 characters +by default when replaying them; set `MaxReplayChars` to override this cap per agent. +Compaction summaries are never truncated regardless of this setting. + +```yaml +Agents: + - Name: Developer + ContextWindow: + MaxReplayChars: 4000 # truncate replayed assistant messages to 4 000 chars +``` + +Default: `0` (uses the global 2,000-character fallback). ### Tool-result truncation (`MaxToolResultChars`) @@ -225,9 +238,12 @@ Agents: - Name: Developer ContextWindow: MaxToolResultChars: 8000 # truncate tool results in replayed history to 8 000 chars + ToolResultCharOverrides: # per-tool overrides (search tools can afford a higher cap) + search_content: 20000 + grep_file: 20000 ``` -Default: `0` (no truncation). +Default: `0` (no truncation). `ToolResultCharOverrides` is only meaningful when `MaxToolResultChars` is also set; a value of `0` in the overrides map disables truncation for that specific tool entirely. **Consumed-read optimisation:** fuseraft distinguishes between `read_file` results that the agent has already acted on and those that are still load-bearing: @@ -396,23 +412,25 @@ Compaction: Two optional flags add structured context blocks before the LLM summary text. Both are prefixed in this order when both are enabled: symbol graph first, then reasoning excerpts. -**`IncludeReasoning`** — prepends a `[REASONING EXCERPTS]` block containing the model's -thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when the *why* -behind prior decisions matters as much as the *what*. Requires `Events` to be configured -(reasoning excerpts are read from the session events log). +**`IncludeReasoning`** (default `true`) — prepends a `[REASONING EXCERPTS]` block containing +the model's thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when +the *why* behind prior decisions matters as much as the *what*. Requires `Events` to be +configured (reasoning excerpts are read from the session events log). When the events log is +absent or contains no reasoning events the block is omitted silently. -**`IncludeSymbolGraph`** — prepends a `[SYMBOL DEPENDENCY GRAPH]` block listing every -`SymbolDefinition` and `SymbolReference` node in the evidence store for files written during -the session. Gives agents an explicit map of what symbols were in scope during the compacted -turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. +**`IncludeSymbolGraph`** (default `true`) — prepends a `[SYMBOL DEPENDENCY GRAPH]` block +listing every `SymbolDefinition` and `SymbolReference` node in the evidence store for files +written during the session. Gives agents an explicit map of what symbols were in scope during +the compacted turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. When no +evidence store is wired the block is omitted silently. ```yaml Compaction: TriggerTurnCount: 40 KeepRecentTurns: 8 Mode: hybrid - IncludeReasoning: true - IncludeSymbolGraph: true + IncludeReasoning: true # default; set to false to suppress + IncludeSymbolGraph: true # default; set to false to suppress ``` ### History pre-pruning @@ -441,7 +459,7 @@ If repeated compactions save very little — for example, a conversation that is threshold but whose LLM summary is nearly as long as the history it replaced — fuseraft suppresses further compaction until the history grows meaningfully. -The guard tracks the savings ratio of the last `AntiThrashWindow` compactions (default 3). If +The guard tracks the savings ratio of the last `AntiThrashWindow` compactions (default 10). If every entry in that window is below `AntiThrashMinSavingsRatio` (default 10%), `ShouldCompact` returns `false`. The guard resets automatically as new turns extend the conversation past the trigger again. @@ -450,8 +468,8 @@ trigger again. Compaction: TriggerTurnCount: 20 KeepRecentTurns: 5 - AntiThrashMinSavingsRatio: 0.15 # suppress if saving less than 15% - AntiThrashWindow: 4 # look at last 4 compactions + AntiThrashMinSavingsRatio: 0.15 # suppress if saving less than 15% (default: 0.10) + AntiThrashWindow: 4 # look at last 4 compactions (default: 10) ``` Set either field to `0` to disable the guard entirely. diff --git a/docs/security.md b/docs/security.md index 6f4c8fd..ad90013 100644 --- a/docs/security.md +++ b/docs/security.md @@ -15,13 +15,12 @@ Security: ### What is checked -| Plugin | Argument | Check type | -|--------|----------|-----------| -| `FileSystem` | `path` | Hard deny if resolved path is outside sandbox | -| `FileSystem` | `directory` | Hard deny if resolved path is outside sandbox | -| `Shell` | `workingDirectory` | Hard deny if resolved path is outside sandbox | -| `Shell` | `command` | Best-effort scan for absolute paths escaping sandbox | -| `Shell` | `script` | Best-effort scan for absolute paths escaping sandbox | +| Plugin | Functions / Argument | Check type | +|--------|----------------------|-----------| +| `FileSystem` | `read_file`, `write_file`, `delete_file`, `list_files` — `path` / `directory` | Hard deny if resolved path is outside sandbox | +| `FileSystem` | `patch_file`, `create_directory`, `delete_directory`, `set_permissions`, `copy_file`, `move_file` | Hard deny if resolved path is outside sandbox (always enforced, regardless of whether `FileSystemPermissions` globs are configured) | +| `Shell` | `shell_run`, `shell_run_script` — `workingDirectory` | Hard deny if resolved path is outside sandbox | +| `Shell` | `shell_run`, `shell_run_script` — `command` / `script` | Best-effort scan for absolute paths escaping sandbox | ### Path resolution From 16af741ea9de65deae907f8e10cf1308b6e25dfb Mon Sep 17 00:00:00 2001 From: Scott Stauffer Date: Mon, 1 Jun 2026 21:39:18 -0500 Subject: [PATCH 7/7] fix(compaction): align window-mode trigger with trim estimation - ShouldCompact was summing Usage.TotalTokens (cumulative API call cost per turn, growing quadratically) while TrimToWindow used chars/4; the trigger could fire repeatedly while the trim found nothing to drop, producing a stuck compaction loop in window mode - Both now use chars/4, matching the TokenBudget calibration documented in CompactionConfig and sessions.md --- src/Orchestration/ConversationCompactor.cs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs index c2ce79a..b9acd1a 100644 --- a/src/Orchestration/ConversationCompactor.cs +++ b/src/Orchestration/ConversationCompactor.cs @@ -51,19 +51,21 @@ resumptionNote is null ? null /// /// Returns true when has reached or exceeded /// the configured trigger. In window mode the trigger is the estimated - /// token count vs ; in all other - /// modes it is the assistant-turn count vs . + /// token count (characters ÷ 4) vs , using + /// the same estimate as so the two stay in sync; in all + /// other modes it is the assistant-turn count vs . /// public bool ShouldCompact(IReadOnlyList messages) { if (IsWindowMode) { - // Prefer provider-reported token counts when available — they include reasoning - // tokens that TruncateIntermediateAssistantReasoning strips from Content, so - // the char-based estimate would undercount them. Fall back to chars/4 only for - // messages that have no Usage record (e.g. injected system messages). - var estimated = messages.Sum(m => - m.Usage is { } u ? u.TotalTokens : (m.Content?.Length ?? 0) / 4); + // Use the same chars/4 estimate as TrimToWindow so the trigger and the trim + // measure the same quantity. Usage.TotalTokens is the cumulative API call cost + // (InputTokens = full context at that turn, not just this message), so summing + // it across messages grows quadratically and diverges from the char-based budget + // that TokenBudget is calibrated against — causing the trigger to fire while + // TrimToWindow finds nothing to drop. + var estimated = messages.Sum(m => (m.Content?.Length ?? 0) / 4); if (estimated > config.TokenBudget) { logger.LogDebug( @@ -93,7 +95,9 @@ public bool ShouldCompact(IReadOnlyList messages) /// /// Drops the oldest user+assistant pairs from until - /// the estimated token count is within . + /// the estimated token count (characters ÷ 4) is within . + /// Uses the same estimation as so the trigger and the + /// trim always agree on when the budget is met. /// No LLM call is made; no summary message is injected. /// public IReadOnlyList TrimToWindow(IReadOnlyList messages)