diff --git a/docs/configuration.md b/docs/configuration.md index 9b36f0c..cd09baf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -309,6 +309,9 @@ Filters are applied in order: `TextOnly` / `ExcludeAgents` first, then `MaxTurnA | `MaxTurnAge` | int | `0` | Keep only messages from the last N agent turns (each turn ends at an assistant reply). Applied after `TextOnly`/`ExcludeAgents` and before `MaxTailMessages`. Semantic alternative to a raw message count — discards entire early-session phases rather than an arbitrary number of messages. `0` means no limit. | | `MaxTailMessages` | int | `0` | After the above filters, keep only the last N messages. `0` means no limit. | | `ContextCapFraction` | double | `0.0` | Soft-cap threshold expressed as a fraction of `MaxTailMessages` (e.g. `0.8` = 80%). When the filtered count exceeds this threshold a `context_cap_warning` event is emitted. Does not change trim behavior — use `MaxTailMessages` to hard-cap. `0.0` disables the warning. | +| `MaxToolResultChars` | int | `0` | Truncate `FunctionResultContent` strings in the replayed history slice to this many characters. A suffix noting the omitted count is appended. `0` disables truncation. See [context-management — Tool-result truncation](context-management.md#tool-result-truncation-maxtoolresultchars). | +| `ToolResultCharOverrides` | object | `{}` | Per-tool-name character cap overrides. Keys are tool function names (case-insensitive); values are the character limit for that tool's results, overriding `MaxToolResultChars`. A value of `0` disables truncation for that tool. Only meaningful when `MaxToolResultChars` is also set. | +| `MaxReplayChars` | int | `0` | Truncate non-summary assistant messages in the replayed history to this many characters. `0` uses the global 2,000-character fallback. Compaction summaries are never truncated. | **`TextOnly: true`** is the primary lever for context reduction. A Reviewer that independently re-reads files and re-runs commands gains nothing from hundreds of tool results produced by the Developer — stripping them can reduce input tokens by 90%+ in typical sessions. @@ -721,11 +724,11 @@ Compaction: | `Model` | object | first agent's model | Model used for generating the summary (`llm` and `hybrid` modes only). | | `Mode` | string | `"llm"` | Compaction mode. See below. | | `TokenBudget` | int | `80000` | Estimated token budget for `window` mode. Oldest message pairs are dropped until the total estimated token count (characters ÷ 4) falls within this limit. Ignored by all other modes. | -| `IncludeReasoning` | bool | `false` | When `true`, reasoning excerpts from the compacted turns are prepended to the summary as a `[REASONING EXCERPTS]` block. Each excerpt is truncated to ~500 tokens so agents resuming after compaction can see the WHY behind prior decisions. Reads `reasoning` events from the session events log (`Events.Path`). Has no effect when `Events` is not configured. | -| `IncludeSymbolGraph` | bool | `false` | When `true`, a `[SYMBOL DEPENDENCY GRAPH]` block is prepended to the summary (before `[REASONING EXCERPTS]` when both are enabled). The block lists every `SymbolDefinition` and `SymbolReference` node in the evidence graph for files written during the session, giving agents an explicit map of what symbols were in scope. Requires `EvidenceStore` and `ChangeTracking` to be configured. | +| `IncludeReasoning` | bool | `true` | Prepends a `[REASONING EXCERPTS]` block to the compaction summary. Each excerpt is truncated to ~500 tokens so agents resuming after compaction can see the WHY behind prior decisions. Reads `reasoning` events from the session events log (`Events.Path`). Omitted silently when `Events` is not configured or contains no reasoning events. Set to `false` to suppress. | +| `IncludeSymbolGraph` | bool | `true` | Prepends a `[SYMBOL DEPENDENCY GRAPH]` block to the summary (before `[REASONING EXCERPTS]` when both are enabled). Lists every `SymbolDefinition` and `SymbolReference` node in the evidence graph for files written during the session. Omitted silently when no evidence store is wired or no symbol nodes are found. Requires `EvidenceStore` and `ChangeTracking` to be configured. Set to `false` to suppress. | | `MaxCharsPerHistoryMessage` | int | `8000` | Maximum characters to include from any single message when building the history text passed to the LLM summarizer. Messages that exceed this limit are truncated and annotated with a `[TRUNCATED]` marker; any tool calls recorded for that turn are appended as a compact one-line list so the summarizer still knows what happened. Set to `0` to disable truncation. | | `AntiThrashMinSavingsRatio` | float | `0.10` | Minimum savings ratio (0–1) a compaction must achieve to count as effective. If the last `AntiThrashWindow` compactions all saved less than this fraction of the conversation, `ShouldCompact` returns `false` until the history grows past the trigger again. Prevents repeated LLM calls that reduce size by less than 10%. Set to `0` to disable. | -| `AntiThrashWindow` | int | `3` | Number of recent compaction outcomes to examine for the anti-thrash guard. The guard only suppresses compaction once this many outcomes have been recorded. Set to `0` to disable. | +| `AntiThrashWindow` | int | `10` | Number of recent compaction outcomes to examine for the anti-thrash guard. The guard only suppresses compaction once this many outcomes have been recorded. Set to `0` to disable. | | `SummaryTemplate` | string | built-in | Custom Liquid-style template for the LLM summary prompt. Supports `{{$task}}`, `{{$turn_count}}`, `{{$change_log}}`, and `{{$history}}` substitutions. When omitted, the built-in structured template is used — see [Compaction summary template](#compaction-summary-template). | **Compaction modes** diff --git a/docs/context-management.md b/docs/context-management.md index fa3d380..9207274 100644 --- a/docs/context-management.md +++ b/docs/context-management.md @@ -165,6 +165,10 @@ Agents: MaxTailMessages: 40 # hard cap after the above filters ContextCapFraction: 0.8 # emit context_cap_warning when at 80% of MaxTailMessages MaxToolResultChars: 8000 # truncate individual tool results in replayed history + ToolResultCharOverrides: # raise the cap for specific tools + search_content: 20000 + grep_file: 20000 + MaxReplayChars: 4000 # truncate verbose assistant messages in replayed history ``` ### TextOnly @@ -201,13 +205,22 @@ Hard cap applied after the other filters. When the filtered list still exceeds t the oldest messages are dropped. Set `ContextCapFraction` to receive a `context_cap_warning` event as an early signal before the hard cap is reached. -### Replay truncation +### Replay truncation (`MaxReplayChars`) Agents sometimes produce verbose stream-of-consciousness output (3–5k tokens). When that text is replayed verbatim in every subsequent turn, compaction summaries grow each cycle and input -tokens balloon. fuseraft automatically truncates verbose non-summary assistant messages to -2,000 characters when replaying them into the next turn's history. Compaction summaries are -never truncated. +tokens balloon. fuseraft truncates verbose non-summary assistant messages to 2,000 characters +by default when replaying them; set `MaxReplayChars` to override this cap per agent. +Compaction summaries are never truncated regardless of this setting. + +```yaml +Agents: + - Name: Developer + ContextWindow: + MaxReplayChars: 4000 # truncate replayed assistant messages to 4 000 chars +``` + +Default: `0` (uses the global 2,000-character fallback). ### Tool-result truncation (`MaxToolResultChars`) @@ -225,9 +238,12 @@ Agents: - Name: Developer ContextWindow: MaxToolResultChars: 8000 # truncate tool results in replayed history to 8 000 chars + ToolResultCharOverrides: # per-tool overrides (search tools can afford a higher cap) + search_content: 20000 + grep_file: 20000 ``` -Default: `0` (no truncation). +Default: `0` (no truncation). `ToolResultCharOverrides` is only meaningful when `MaxToolResultChars` is also set; a value of `0` in the overrides map disables truncation for that specific tool entirely. **Consumed-read optimisation:** fuseraft distinguishes between `read_file` results that the agent has already acted on and those that are still load-bearing: @@ -396,23 +412,25 @@ Compaction: Two optional flags add structured context blocks before the LLM summary text. Both are prefixed in this order when both are enabled: symbol graph first, then reasoning excerpts. -**`IncludeReasoning`** — prepends a `[REASONING EXCERPTS]` block containing the model's -thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when the *why* -behind prior decisions matters as much as the *what*. Requires `Events` to be configured -(reasoning excerpts are read from the session events log). +**`IncludeReasoning`** (default `true`) — prepends a `[REASONING EXCERPTS]` block containing +the model's thinking for each compacted turn (truncated to ~500 tokens per turn). Useful when +the *why* behind prior decisions matters as much as the *what*. Requires `Events` to be +configured (reasoning excerpts are read from the session events log). When the events log is +absent or contains no reasoning events the block is omitted silently. -**`IncludeSymbolGraph`** — prepends a `[SYMBOL DEPENDENCY GRAPH]` block listing every -`SymbolDefinition` and `SymbolReference` node in the evidence store for files written during -the session. Gives agents an explicit map of what symbols were in scope during the compacted -turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. +**`IncludeSymbolGraph`** (default `true`) — prepends a `[SYMBOL DEPENDENCY GRAPH]` block +listing every `SymbolDefinition` and `SymbolReference` node in the evidence store for files +written during the session. Gives agents an explicit map of what symbols were in scope during +the compacted turns. Requires `EvidenceStore` and `ChangeTracking` to be configured. When no +evidence store is wired the block is omitted silently. ```yaml Compaction: TriggerTurnCount: 40 KeepRecentTurns: 8 Mode: hybrid - IncludeReasoning: true - IncludeSymbolGraph: true + IncludeReasoning: true # default; set to false to suppress + IncludeSymbolGraph: true # default; set to false to suppress ``` ### History pre-pruning @@ -441,7 +459,7 @@ If repeated compactions save very little — for example, a conversation that is threshold but whose LLM summary is nearly as long as the history it replaced — fuseraft suppresses further compaction until the history grows meaningfully. -The guard tracks the savings ratio of the last `AntiThrashWindow` compactions (default 3). If +The guard tracks the savings ratio of the last `AntiThrashWindow` compactions (default 10). If every entry in that window is below `AntiThrashMinSavingsRatio` (default 10%), `ShouldCompact` returns `false`. The guard resets automatically as new turns extend the conversation past the trigger again. @@ -450,8 +468,8 @@ trigger again. Compaction: TriggerTurnCount: 20 KeepRecentTurns: 5 - AntiThrashMinSavingsRatio: 0.15 # suppress if saving less than 15% - AntiThrashWindow: 4 # look at last 4 compactions + AntiThrashMinSavingsRatio: 0.15 # suppress if saving less than 15% (default: 0.10) + AntiThrashWindow: 4 # look at last 4 compactions (default: 10) ``` Set either field to `0` to disable the guard entirely. diff --git a/docs/security.md b/docs/security.md index 6f4c8fd..ad90013 100644 --- a/docs/security.md +++ b/docs/security.md @@ -15,13 +15,12 @@ Security: ### What is checked -| Plugin | Argument | Check type | -|--------|----------|-----------| -| `FileSystem` | `path` | Hard deny if resolved path is outside sandbox | -| `FileSystem` | `directory` | Hard deny if resolved path is outside sandbox | -| `Shell` | `workingDirectory` | Hard deny if resolved path is outside sandbox | -| `Shell` | `command` | Best-effort scan for absolute paths escaping sandbox | -| `Shell` | `script` | Best-effort scan for absolute paths escaping sandbox | +| Plugin | Functions / Argument | Check type | +|--------|----------------------|-----------| +| `FileSystem` | `read_file`, `write_file`, `delete_file`, `list_files` — `path` / `directory` | Hard deny if resolved path is outside sandbox | +| `FileSystem` | `patch_file`, `create_directory`, `delete_directory`, `set_permissions`, `copy_file`, `move_file` | Hard deny if resolved path is outside sandbox (always enforced, regardless of whether `FileSystemPermissions` globs are configured) | +| `Shell` | `shell_run`, `shell_run_script` — `workingDirectory` | Hard deny if resolved path is outside sandbox | +| `Shell` | `shell_run`, `shell_run_script` — `command` / `script` | Best-effort scan for absolute paths escaping sandbox | ### Path resolution diff --git a/src/Cli/OrchestratorBuilder.cs b/src/Cli/OrchestratorBuilder.cs index ee3c345..e164e9f 100644 --- a/src/Cli/OrchestratorBuilder.cs +++ b/src/Cli/OrchestratorBuilder.cs @@ -713,6 +713,15 @@ t.Pattern is not null || chatClientFactory.Create(summaryModel), compactionConfig, loggerFactory.CreateLogger(), resumptionNote, changeLogPath, intentLog, config.Events?.Path, evidenceStore); + + if ((compactionConfig.Mode ?? string.Empty).Equals("intent", StringComparison.OrdinalIgnoreCase) + && intentLog is null) + { + loggerFactory.CreateLogger(nameof(OrchestratorBuilder)).LogWarning( + "Compaction.Mode is 'intent' but no ChangeTracking.IntentLogPath is configured — " + + "compaction will fall back to lossless or LLM mode at runtime. " + + "Set ChangeTracking.IntentLogPath to enable deterministic intent compaction."); + } } // Build the post-session skill curator when curation is enabled. diff --git a/src/Core/Models/CompactionConfig.cs b/src/Core/Models/CompactionConfig.cs index 508a791..d6e4a96 100644 --- a/src/Core/Models/CompactionConfig.cs +++ b/src/Core/Models/CompactionConfig.cs @@ -67,19 +67,21 @@ public record CompactionConfig /// When true, reasoning excerpts from the compacted turn range are prepended to /// the compaction summary. Each excerpt is truncated to approximately 500 tokens so agents /// resuming after compaction can see the WHY behind prior decisions, not just the artifacts. - /// Reads reasoning events from the session's events log. Default: false. + /// Reads reasoning events from the session's events log. When the events log is + /// absent or contains no reasoning events the block is omitted silently. + /// Default: true. /// - public bool IncludeReasoning { get; init; } = false; + public bool IncludeReasoning { get; init; } = true; /// /// When true, a symbol dependency graph derived from the session's changed files is /// prepended to the compaction summary (before reasoning excerpts when both are enabled). /// Queries SymbolDefinition and SymbolReference nodes from the evidence store /// for every file written during the session, giving agents an explicit map of what symbols - /// were in scope across the compacted turns. Requires an active EvidenceStore. - /// Default: false. + /// were in scope across the compacted turns. When no evidence store is wired or no symbol + /// nodes are found the block is omitted silently. Default: true. /// - public bool IncludeSymbolGraph { get; init; } = false; + public bool IncludeSymbolGraph { get; init; } = true; /// /// Optional custom prompt template for LLM-mode compaction. When set, replaces the @@ -111,7 +113,7 @@ public record CompactionConfig /// /// Number of recent compaction outcomes to examine for the anti-thrash guard. /// Only suppresses compaction once this many outcomes have been recorded. - /// Default: 3. Set to 0 to disable the anti-thrash check. + /// Default: 10. Set to 0 to disable the anti-thrash check. /// - public int AntiThrashWindow { get; init; } = 3; + public int AntiThrashWindow { get; init; } = 10; } diff --git a/src/Core/Models/ContextWindowConfig.cs b/src/Core/Models/ContextWindowConfig.cs index e6154ca..dcb5874 100644 --- a/src/Core/Models/ContextWindowConfig.cs +++ b/src/Core/Models/ContextWindowConfig.cs @@ -105,4 +105,38 @@ public sealed record ContextWindowConfig /// Default: 0 (no truncation). /// public int MaxToolResultChars { get; init; } + + /// + /// Maximum characters to replay from a single non-summary assistant message in the + /// history slice passed to this agent. When an assistant message text exceeds this limit + /// the content is truncated and annotated with the omitted character count. + /// + /// + /// Agents sometimes produce multi-thousand-character reasoning blocks that are replayed + /// verbatim on every subsequent turn, compounding input-token growth. Compaction-summary + /// messages are never truncated regardless of this setting. + /// + /// + /// Default: 0 (uses the global 2,000-char fallback applied during session replay). + /// + public int MaxReplayChars { get; init; } + + /// + /// Per-tool-name character limit overrides applied during tool result truncation. + /// When a key matches a tool function name (case-insensitive), its value is used as the + /// character cap for that tool's results instead of . + /// + /// + /// The primary use case is giving search and grep tools a higher limit than file-read + /// tools. For example: + /// + /// "ToolResultCharOverrides": { "search_content": 20000, "grep_file": 20000 } + /// + /// A value of 0 disables truncation for that tool entirely. + /// + /// + /// Only meaningful when is also set. + /// Default: empty (no overrides). + /// + public Dictionary ToolResultCharOverrides { get; init; } = []; } diff --git a/src/Infrastructure/AgentFactory.cs b/src/Infrastructure/AgentFactory.cs index 5a0da53..d3cde28 100644 --- a/src/Infrastructure/AgentFactory.cs +++ b/src/Infrastructure/AgentFactory.cs @@ -171,7 +171,12 @@ public AIAgent Create(AgentConfig config, Action? onToo // Deterministic sliding-window cap: always keep only the last N tool call/result // pairs in full, replacing older ones with placeholders unconditionally. // Applied before the budget-reactive trim so the window runs first. - var maxInTurnToolPairs = config.MaxInTurnToolPairs; + // When MaxContextTokens is set but no explicit pair limit is configured, default + // to 12 pairs to prevent O(N²) tool-result accumulation within a turn. + const int DefaultToolPairsWhenBudgeted = 12; + var maxInTurnToolPairs = config.MaxInTurnToolPairs > 0 + ? config.MaxInTurnToolPairs + : (resolvedModel.MaxContextTokens > 0 ? DefaultToolPairsWhenBudgeted : 0); // Tool schema overhead: computed once at build time since the tool list is fixed // for the lifetime of this agent. Included in the context budget and payload diff --git a/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs b/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs index ef279a2..8da4ab1 100644 --- a/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs +++ b/src/Infrastructure/Plugins/SandboxEnforcementFilter.cs @@ -74,6 +74,15 @@ public sealed class SandboxEnforcementFilter private static readonly string[] FileSystemFunctions = ["read_file", "write_file", "delete_file", "list_files"]; + // Write-type extended functions that must always be routed through InspectFileSystem for + // sandbox boundary checks, even when no FileSystemPermissions glob matchers are configured. + // These functions create, modify, or remove paths and must stay within the sandbox root. + private static readonly HashSet SandboxedExtendedWriteFunctions = new(StringComparer.OrdinalIgnoreCase) + { + "patch_file", "create_directory", "delete_directory", "set_permissions", + "copy_file", "move_file", + }; + private static readonly string[] ShellFunctions = ["shell_run", "shell_run_script"]; @@ -205,11 +214,13 @@ public AIAgent WrapAgent(AIAgent agent) => var ringDenial = InspectRing(functionName); if (ringDenial is not null) return ringDenial; - // Core FS functions are always sandboxed; extended functions are routed when any glob - // matcher is configured so they get sandbox + deny/read/write checks. + // Core FS functions are always sandboxed; write-type extended functions are also + // always sandboxed (boundary check only). Other extended functions are routed when + // any glob matcher is configured so they get sandbox + deny/read/write checks. bool hasGlobMatcher = _fsDenyMatcher is not null || _fsReadMatcher is not null || _fsWriteMatcher is not null; bool isFsFunction = FileSystemFunctions.Any(f => string.Equals(f, functionName, StringComparison.OrdinalIgnoreCase)) + || SandboxedExtendedWriteFunctions.Contains(functionName) || (hasGlobMatcher && AllExtendedFsFunctions.Contains(functionName)); if (isFsFunction) diff --git a/src/Orchestration/AgentOrchestrator.cs b/src/Orchestration/AgentOrchestrator.cs index 9d639b1..46216c7 100644 --- a/src/Orchestration/AgentOrchestrator.cs +++ b/src/Orchestration/AgentOrchestrator.cs @@ -419,7 +419,11 @@ await eventEmitter.EmitAsync("turn_end", } // Select the next agent. + // Capture the history count before selection so correction messages injected by + // the strategy (ConflictingEvidence / NoProgress) can be identified afterwards. + int preSelectCount = history.Count; var agent = await selection.SelectAsync(agents, history, cancellationToken); + int postSelectCount = history.Count; if (agent is null) break; logger.LogDebug( @@ -445,7 +449,9 @@ await eventEmitter.EmitAsync("turn_end", // sees only what it needs rather than the full session transcript. The shared // history list is still updated after the turn so routing/termination strategies // continue to work normally. - // When no Context spec is set, fall back to the traditional ContextWindow filter. + // When no Context spec is set, fall back to the traditional ContextWindow filter + // and auto-inject the session context summary (context_summary.md) as the second + // message when it exists, preventing agents from wasting turns re-reading brief.json. var agentCfg = agentConfigs.GetValueOrDefault(agent.Name ?? ""); IReadOnlyList filtered; if (agentCfg?.Context is { Count: > 0 } agentContextSources && contextAssembler is not null) @@ -459,7 +465,22 @@ await eventEmitter.EmitAsync("turn_end", } else { - filtered = ContextWindowFilter.Apply(history, agentCfg?.ContextWindow); + var raw = ContextWindowFilter.Apply(history, agentCfg?.ContextWindow); + if (contextAssembler is not null) + { + var sessionCtx = await contextAssembler.ReadSessionContextAsync(cancellationToken); + if (sessionCtx is not null) + { + var withCtx = new List(raw.Count + 1); + if (raw.Count > 0) withCtx.Add(raw[0]); + withCtx.Add(new ChatMessage(ChatRole.User, + $"[Session Context]\n\n{sessionCtx.Trim()}")); + withCtx.AddRange(raw.Skip(1)); + filtered = withCtx; + } + else filtered = raw; + } + else filtered = raw; } IEnumerable context = (hasInstructions || memoryManager is not null) && instructions is not null @@ -475,6 +496,22 @@ await eventEmitter.EmitAsync("turn_end", history.Count, filtered.Count); + // Pre-turn budget guard: estimate the input token cost of this context slice and + // abort before the LLM call if cumulative + estimated input would exceed the limit. + // Prevents the one-turn overshoot that occurs when the post-yield check fires too + // late (e.g. a file-read turn that consumes tens of thousands of tokens). + if (config.MaxTotalTokens is { } preTurnLimit) + { + var estimatedInputTokens = EstimateContextTokens(context); + if (cumulativeTokens + estimatedInputTokens > preTurnLimit) + { + logger.LogWarning( + "[Orchestrator] Pre-turn budget guard: cumulative {Cumulative:N0} + estimated input {Estimated:N0} > limit {Limit:N0} — aborting before turn.", + cumulativeTokens, estimatedInputTokens, preTurnLimit); + throw new BudgetExceededException(cumulativeTokens + estimatedInputTokens, preTurnLimit); + } + } + AgentResponse response = governanceKernel?.CircuitBreaker is { } cb ? await cb.ExecuteAsync(() => agent.RunAsync(context, null, null, cancellationToken)) : await agent.RunAsync(context, null, null, cancellationToken); @@ -581,13 +618,16 @@ await eventEmitter.EmitAsync("reasoning", if (memoryManager is not null) await memoryManager.PostTurnAsync(agentMessage.AgentName, [..history], cancellationToken); - // Periodic verifier: run the meta-agent every N turns to audit evidence. - // Skipped when the verifier itself just ran to prevent self-loops. - if (config.Verifier is { EveryNTurns: > 0 } verCfg + // Periodic verifier: run the meta-agent every N turns to audit evidence, OR + // immediately when a ConflictingEvidence / NoProgress correction was injected this + // turn (evidence-driven trigger). Skipped when the verifier itself just ran. + if (config.Verifier is { } verCfg && verifierAgent is not null - && agentMessage.TurnIndex > 0 - && agentMessage.TurnIndex % verCfg.EveryNTurns == 0 - && !string.Equals(agentMessage.AgentName, verCfg.AgentName, StringComparison.OrdinalIgnoreCase)) + && !string.Equals(agentMessage.AgentName, verCfg.AgentName, StringComparison.OrdinalIgnoreCase) + && ( + (verCfg.EveryNTurns > 0 && agentMessage.TurnIndex > 0 && agentMessage.TurnIndex % verCfg.EveryNTurns == 0) + || (verCfg.TriggerOnSuspiciousTransition && HasSuspiciousTransitionSignal(history, preSelectCount, postSelectCount)) + )) { AgentStarting?.Invoke(verifierAgent.Name ?? "Verifier"); agentFactory.OnAgentTurnStarting(); @@ -769,4 +809,42 @@ private static void WireDidResolver(ITerminationCondition condition, Func Guid.NewGuid().ToString("N")[..8]; + + // Scans messages at indices [from, to) for ConflictingEvidence or NoProgress correction + // signals injected by the selection strategy. Returns true when any such signal is found, + // indicating the verifier should audit the current turn's output. + private static bool HasSuspiciousTransitionSignal(IList history, int from, int to) + { + for (int i = from; i < to && i < history.Count; i++) + { + var msg = history[i]; + if (msg.Role != ChatRole.User) continue; + var text = msg.Text ?? string.Empty; + if (text.StartsWith("NO TOOL CALLS", StringComparison.Ordinal) || + text.StartsWith("CRITICAL:", StringComparison.Ordinal) || + text.Contains("EVIDENCE INCONSISTENCY", StringComparison.Ordinal) || + text.Contains("EVIDENCE AUDIT REQUIRED", StringComparison.Ordinal)) + return true; + } + return false; + } + + // Estimates the input token cost of a context slice by summing all content chars across + // message types and dividing by 4. Used for the pre-turn budget guard; intentionally + // conservative (actual tokenisation may differ but is rarely smaller than chars/4). + private static int EstimateContextTokens(IEnumerable messages) + { + int chars = 0; + foreach (var msg in messages) + foreach (var content in msg.Contents) + chars += content switch + { + TextContent tc => tc.Text?.Length ?? 0, + FunctionCallContent fc => (fc.Name?.Length ?? 0) + + (fc.Arguments?.Values.Sum(v => v?.ToString()?.Length ?? 0) ?? 0), + FunctionResultContent fr => fr.Result?.ToString()?.Length ?? 0, + _ => 0, + }; + return chars / 4; + } } diff --git a/src/Orchestration/ContextWindowFilter.cs b/src/Orchestration/ContextWindowFilter.cs index 4809188..cdc8cfd 100644 --- a/src/Orchestration/ContextWindowFilter.cs +++ b/src/Orchestration/ContextWindowFilter.cs @@ -118,8 +118,37 @@ public static IReadOnlyList Apply( } // Step 4: Tail limit — keep only the last N messages. + // Correction messages (RETRY, STAGNATION, [fuseraft:blocked, etc.) are pinned so they + // always survive the position-based cut. Non-correction messages are trimmed to the tail + // window; the final list preserves original message order. if (window.MaxTailMessages > 0 && list.Count > window.MaxTailMessages) - list = list.Skip(list.Count - window.MaxTailMessages).ToList(); + { + var pinnedSet = new HashSet( + Enumerable.Range(0, list.Count).Where(i => IsCorrectionMessage(list[i]))); + + if (pinnedSet.Count == 0) + { + list = list.Skip(list.Count - window.MaxTailMessages).ToList(); + } + else + { + var unpinnedIndices = Enumerable.Range(0, list.Count) + .Where(i => !pinnedSet.Contains(i)) + .ToList(); + + int firstKeptUnpinned = unpinnedIndices.Count > window.MaxTailMessages + ? unpinnedIndices[unpinnedIndices.Count - window.MaxTailMessages] + : 0; + + var kept = new List(list.Count); + for (int i = 0; i < list.Count; i++) + { + if (i >= firstKeptUnpinned || pinnedSet.Contains(i)) + kept.Add(list[i]); + } + list = kept; + } + } // Step 5: Sanitize tool_use/tool_result pairing at slice boundaries. // Steps 3 and 4 cut by position; either cut can land inside a tool-call/result @@ -135,17 +164,59 @@ public static IReadOnlyList Apply( // When MaxToolResultChars is set, any FunctionResultContent string that exceeds // the limit is truncated and annotated with the omitted character count. if (window.MaxToolResultChars > 0) - list = TruncateToolResults(list, window.MaxToolResultChars); + list = TruncateToolResults(list, window.MaxToolResultChars, window.ToolResultCharOverrides); + + // Step 7: Truncate verbose assistant messages. + // When MaxReplayChars is set, assistant text content that exceeds the limit is + // truncated. Compaction-summary messages (marked by their header prefix) are exempt. + if (window.MaxReplayChars > 0) + list = TruncateAssistantContent(list, window.MaxReplayChars); return list; } + private static List TruncateAssistantContent(List list, int maxChars) + { + var result = new List(list.Count); + foreach (var msg in list) + { + if (msg.Role != ChatRole.Assistant) + { + result.Add(msg); + continue; + } + + var textContent = string.Concat(msg.Contents.OfType().Select(t => t.Text)); + // Compaction summaries are already compact — skip them unconditionally. + if (textContent.StartsWith("[CONVERSATION SUMMARY", StringComparison.Ordinal) || + textContent.Length <= maxChars) + { + result.Add(msg); + continue; + } + + var truncated = textContent[..maxChars] + + $"\n[...truncated — {textContent.Length - maxChars:N0} chars omitted to reduce context size...]"; + + var newContents = msg.Contents + .Where(c => c is not TextContent) + .Prepend(new TextContent(truncated)) + .ToList(); + + result.Add(new ChatMessage(ChatRole.Assistant, newContents) { AuthorName = msg.AuthorName }); + } + return result; + } + // How much of a consumed read_file result to keep for structural context (file shape, // imports, class header) after a downstream write/patch confirms the content was acted on. // The rest is elided — the model's mental model of the file is stale at that point anyway. private const int ConsumedReadCapChars = 500; - private static List TruncateToolResults(List list, int maxChars) + private static List TruncateToolResults( + List list, + int maxChars, + IReadOnlyDictionary? overrides = null) { // Fast path: no ChatRole.Tool messages in the slice. if (!list.Any(m => m.Role == ChatRole.Tool)) return list; @@ -155,6 +226,16 @@ private static List TruncateToolResults(List list, int // the model hasn't yet acted on are left at the normal maxChars limit. var consumedReadIds = BuildConsumedReadCallIds(list); + // Build callId → toolName so per-tool overrides can be resolved for each result. + var callToolNames = new Dictionary(StringComparer.Ordinal); + foreach (var msg in list) + { + if (msg.Role != ChatRole.Assistant) continue; + foreach (var c in msg.Contents) + if (c is FunctionCallContent fc && fc.CallId is not null) + callToolNames[fc.CallId] = fc.Name ?? string.Empty; + } + var result = new List(list.Count); foreach (var msg in list) { @@ -182,10 +263,29 @@ private static List TruncateToolResults(List list, int $"file was written or patched later this session; " + $"call read_file again if current content is needed]"; } - else if (s.Length > maxChars) + else { - truncated = s[..maxChars] + - $"\n[...truncated — {s.Length - maxChars:N0} chars omitted to reduce context size...]"; + // Resolve the per-tool limit: check overrides first, then fall back to maxChars. + // A zero override value disables truncation for that tool entirely. + int limit = maxChars; + if (overrides is { Count: > 0 } && + callToolNames.TryGetValue(fr.CallId ?? string.Empty, out var toolName)) + { + foreach (var kv in overrides) + { + if (string.Equals(kv.Key, toolName, StringComparison.OrdinalIgnoreCase)) + { + limit = kv.Value; + break; + } + } + } + + if (limit > 0 && s.Length > limit) + { + truncated = s[..limit] + + $"\n[...truncated — {s.Length - limit:N0} chars omitted to reduce context size...]"; + } } if (truncated is not null) @@ -342,28 +442,68 @@ private static List SanitizeToolPairs(List list) return result; } - // Maximum number of characters to replay from a single non-summary assistant message. + // Prefixes that unambiguously identify a ChatRole.User correction injected by + // CorrectionEngine, routing strategies, or the orchestrator's verifier hook. + private static readonly string[] CorrectionPrefixes = + [ + "RETRY ", + "NO TOOL CALLS", + "CRITICAL:", + "APPROVED rejected:", + "WRONG KEYWORD:", + "JSON block correct", + "BUILD FAILURE:", + "STAGNATION (", + "STUCK ", + "HALLUCINATION:", + "PERSISTENT BUILD FAILURE", + "VERIFICATION FINDING", + "Files written this turn", + "No handoff keyword", + "EVIDENCE INCONSISTENCY", // ConflictingEvidence (KeywordSelectionStrategy) + "EVIDENCE AUDIT REQUIRED", // ConflictingEvidence (StateMachineSelectionStrategy) + "MISSING ARTIFACT", // MissingEvidence (both strategies) + ]; + + /// + /// Returns true when is a correction injected by + /// , a routing strategy, + /// or the orchestrator's verifier hook. Used to pin corrections so they survive + /// trimming, and to re-inject them + /// into assembled agent contexts. + /// + public static bool IsCorrectionMessage(ChatMessage message) + { + if (message.Role != ChatRole.User) return false; + var text = message.Text ?? string.Empty; + if (text.Contains("[fuseraft:blocked", StringComparison.Ordinal)) return true; + foreach (var prefix in CorrectionPrefixes) + if (text.StartsWith(prefix, StringComparison.Ordinal)) return true; + return false; + } + + // Global default applied during checkpoint-resume replay when no per-agent limit is set. // Agents sometimes produce verbose stream-of-consciousness reasoning text (3–5k output // tokens). When that text is replayed verbatim in every subsequent turn it causes // compaction summaries to grow each cycle and in-turn input tokens to balloon (450k+). // Compaction summaries (IsCompactionSummary) are already compact and are never truncated. - private const int MaxReplayChars = 2_000; + internal const int DefaultMaxReplayChars = 2_000; /// /// Returns the content string to replay for into the next /// StreamAsync call's history. Verbose non-summary assistant messages are - /// truncated at to prevent compounding context growth. + /// truncated at to prevent compounding context growth. /// - public static string TruncateReplayContent(AgentMessage message) + public static string TruncateReplayContent(AgentMessage message, int maxReplayChars = DefaultMaxReplayChars) { var content = message.Content ?? string.Empty; if (message.IsCompactionSummary || message.Role != "assistant" - || content.Length <= MaxReplayChars) + || content.Length <= maxReplayChars) return content; - return content[..MaxReplayChars] + - $"\n[...truncated — {content.Length - MaxReplayChars:N0} chars omitted to reduce context size...]"; + return content[..maxReplayChars] + + $"\n[...truncated — {content.Length - maxReplayChars:N0} chars omitted to reduce context size...]"; } } diff --git a/src/Orchestration/ConversationCompactor.cs b/src/Orchestration/ConversationCompactor.cs index 8402c15..b9acd1a 100644 --- a/src/Orchestration/ConversationCompactor.cs +++ b/src/Orchestration/ConversationCompactor.cs @@ -51,13 +51,20 @@ resumptionNote is null ? null /// /// Returns true when has reached or exceeded /// the configured trigger. In window mode the trigger is the estimated - /// token count vs ; in all other - /// modes it is the assistant-turn count vs . + /// token count (characters ÷ 4) vs , using + /// the same estimate as so the two stay in sync; in all + /// other modes it is the assistant-turn count vs . /// public bool ShouldCompact(IReadOnlyList messages) { if (IsWindowMode) { + // Use the same chars/4 estimate as TrimToWindow so the trigger and the trim + // measure the same quantity. Usage.TotalTokens is the cumulative API call cost + // (InputTokens = full context at that turn, not just this message), so summing + // it across messages grows quadratically and diverges from the char-based budget + // that TokenBudget is calibrated against — causing the trigger to fire while + // TrimToWindow finds nothing to drop. var estimated = messages.Sum(m => (m.Content?.Length ?? 0) / 4); if (estimated > config.TokenBudget) { @@ -88,7 +95,9 @@ public bool ShouldCompact(IReadOnlyList messages) /// /// Drops the oldest user+assistant pairs from until - /// the estimated token count is within . + /// the estimated token count (characters ÷ 4) is within . + /// Uses the same estimation as so the trigger and the + /// trim always agree on when the budget is met. /// No LLM call is made; no summary message is injected. /// public IReadOnlyList TrimToWindow(IReadOnlyList messages) @@ -154,6 +163,9 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess var prefixBlock = CombineBlocks(symbolBlock, reasoningBlock); // Intent mode: reconstruct from the intent log — fully deterministic, no LLM call. + // When the intent log is unavailable, record a visible fallback notice so agents + // resuming after compaction know the summary was degraded. + string? intentFallbackNotice = null; if (mode == "intent") { if (intentLog is not null) @@ -162,6 +174,11 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess toCompact[0].TurnIndex, toCompact[^1].TurnIndex, cancellationToken); var intentSummary = BuildIntentDerivedSummary( toCompact[0].TurnIndex, toCompact[^1].TurnIndex, intents, prefixBlock); + intentSummary = intentSummary with + { + Usage = AccumulateCompactedUsage(toCompact, null), + ToolCalls = AccumulateCompactedToolCalls(toCompact), + }; logger.LogInformation( "Intent compaction: {Compacted} turns replaced by intent log reconstruction ({IntentCount} intents).", toCompact.Count, intents.Count); @@ -169,7 +186,12 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess } logger.LogWarning( - "Compaction mode is 'intent' but no intent log is available — falling back to lossless/llm."); + "Compaction mode is 'intent' but no intent log is available — falling back to lossless/llm. " + + "Configure ChangeTracking.IntentLogPath to enable deterministic intent compaction."); + intentFallbackNotice = + "[COMPACTION WARNING: 'intent' mode was requested but no intent log is wired — " + + "this summary was generated using fallback compaction (lossless or LLM). " + + "Configure ChangeTracking.IntentLogPath to suppress this warning.]"; // Fall through to lossless / llm. } @@ -185,10 +207,15 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess }; if (ExpandedNote is not null) reconstructed = reconstructed with { Content = reconstructed.Content + "\n\n---\n" + ExpandedNote }; + reconstructed = reconstructed with + { + Usage = AccumulateCompactedUsage(toCompact, null), + ToolCalls = AccumulateCompactedToolCalls(toCompact), + }; logger.LogInformation( "Lossless compaction: {Compacted} turns replaced by evidence reconstruction.", toCompact.Count); - return (reconstructed, toRetain); + return (PrependFallbackNotice(reconstructed, intentFallbackNotice), toRetain); } // Hybrid: prepend reconstruction before the LLM summary. @@ -215,9 +242,8 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess Role = "user", TurnIndex = toCompact[^1].TurnIndex, IsCompactionSummary = true, - Usage = summUsage is not null - ? new TokenUsage(summUsage.InputTokens, summUsage.OutputTokens) - : null + Usage = AccumulateCompactedUsage(toCompact, summUsage), + ToolCalls = AccumulateCompactedToolCalls(toCompact), }; logger.LogInformation( @@ -231,7 +257,7 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess // LLM summary failed; return the lossless reconstruction alone so the session survives. logger.LogError(ex, "Hybrid compaction: LLM summary call failed — returning lossless reconstruction only."); - return (reconstructed, toRetain); + return (reconstructed with { Usage = AccumulateCompactedUsage(toCompact, null) }, toRetain); } } @@ -256,16 +282,15 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess Role = "user", TurnIndex = toCompact[^1].TurnIndex, IsCompactionSummary = true, - Usage = summaryUsage is not null - ? new TokenUsage(summaryUsage.InputTokens, summaryUsage.OutputTokens) - : null + Usage = AccumulateCompactedUsage(toCompact, summaryUsage), + ToolCalls = AccumulateCompactedToolCalls(toCompact), }; logger.LogInformation( "Compaction complete. Turns 0–{Last} replaced by summary.", toCompact[^1].TurnIndex); - return (summary, toRetain); + return (PrependFallbackNotice(summary, intentFallbackNotice), toRetain); } catch (OperationCanceledException) { throw; } catch (Exception ex) @@ -273,12 +298,52 @@ public IReadOnlyList TrimToWindow(IReadOnlyList mess logger.LogError(ex, "LLM compaction failed; inserting fallback marker for turns {First}–{Last}.", toCompact[0].TurnIndex, toCompact[^1].TurnIndex); - return (BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message), toRetain); + var fallback = BuildFallbackSummary(toCompact[0].TurnIndex, toCompact[^1].TurnIndex, ex.Message) + with { ToolCalls = AccumulateCompactedToolCalls(toCompact) }; + return (PrependFallbackNotice(fallback, intentFallbackNotice), toRetain); } } // Internals + // Collects all ToolCallRecord entries from the compacted turns into a flat list so the + // summary message preserves them. Downstream consumers (telemetry, BuildModifiedFilesNote) + // inspect ToolCalls on AgentMessages; without this they silently drop records for any turn + // that was compacted, producing incomplete data for succeeded/failed tool tracking. + private static IReadOnlyList? AccumulateCompactedToolCalls( + IReadOnlyList compacted) + { + List? all = null; + foreach (var m in compacted) + { + if (m.ToolCalls is not { Count: > 0 }) continue; + all ??= []; + all.AddRange(m.ToolCalls); + } + return all; + } + + // Sums the token costs of all compacted turns and folds in the summary-call cost. + // The total is stored on the summary AgentMessage so AgentOrchestrator can seed + // cumulativeTokens correctly on the next StreamAsync call (after resume/compaction), + // keeping MaxTotalTokens enforcement accurate across compaction boundaries. + private static TokenUsage? AccumulateCompactedUsage( + IReadOnlyList compacted, + TokenUsage? summaryCallUsage) + { + int totalInput = summaryCallUsage?.InputTokens ?? 0; + int totalOutput = summaryCallUsage?.OutputTokens ?? 0; + foreach (var m in compacted) + { + if (m.Usage is null) continue; + totalInput += m.Usage.InputTokens; + totalOutput += m.Usage.OutputTokens; + } + return (totalInput > 0 || totalOutput > 0) + ? new TokenUsage(totalInput, totalOutput) + : null; + } + private AgentMessage BuildIntentDerivedSummary( int firstTurn, int lastTurn, @@ -476,6 +541,9 @@ private void RecordSavings(double ratio) _recentSavings.Dequeue(); } + private static AgentMessage PrependFallbackNotice(AgentMessage msg, string? notice) => + notice is null ? msg : msg with { Content = notice + "\n\n" + msg.Content }; + private AgentMessage BuildFallbackSummary(int firstTurn, int lastTurn, string errorMessage) { var content = diff --git a/src/Orchestration/GraphOrchestrator.cs b/src/Orchestration/GraphOrchestrator.cs index 028a5c3..abd5a8c 100644 --- a/src/Orchestration/GraphOrchestrator.cs +++ b/src/Orchestration/GraphOrchestrator.cs @@ -1165,7 +1165,8 @@ await eventEmitter.EmitAsync("no_keyword", int histBefore2 = ctx.History.Count; await CorrectionEngine.InjectNoKeywordCorrection( - ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter); + ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter, + agentMsg.ToolCalls); await PersistCorrectionsAsync(ctx, histBefore2, ct).ConfigureAwait(false); if (consecutiveFails >= maxRetries) @@ -1623,7 +1624,8 @@ await eventEmitter.EmitAsync("no_keyword", int histBefore2 = ctx.History.Count; await CorrectionEngine.InjectNoKeywordCorrection( - ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter); + ctx.History, responseText, agentName, consecutiveFails, routeTable, eventEmitter, + agentMsg.ToolCalls); await PersistCorrectionsAsync(ctx, histBefore2, ct).ConfigureAwait(false); if (consecutiveFails >= maxRetries) diff --git a/src/Orchestration/HandoffContextResolver.cs b/src/Orchestration/HandoffContextResolver.cs index d5c5602..b730bf9 100644 --- a/src/Orchestration/HandoffContextResolver.cs +++ b/src/Orchestration/HandoffContextResolver.cs @@ -54,6 +54,14 @@ public ContextAssembler( public void SetSessionId(string sessionId) => _sessionId = sessionId; + /// + /// Returns the current session context summary, or null when the file does not + /// exist or is empty. Used by orchestrators to auto-inject context for agents that do not + /// declare an explicit Context spec. + /// + public Task ReadSessionContextAsync(CancellationToken ct = default) + => ResolveSessionContextAsync(ct); + // ── Handoff injection (state machine transitions) ──────────────────────── /// @@ -167,6 +175,14 @@ public async Task> AssembleForAgentAsync( } } + // 4. Pending corrections — user correction messages injected into shared history after + // this agent's last turn. Context-spec agents replace shared-history replay entirely, + // so corrections written to shared history (by CorrectionEngine, routing strategies, + // or the verifier hook) would otherwise be invisible on the next invocation. Re-inject + // them here so the agent always sees the most recent feedback addressed to it. + var pendingCorrections = ExtractPendingCorrections(agentName, sharedHistory); + result.AddRange(pendingCorrections); + return result; } @@ -314,6 +330,35 @@ private static IReadOnlyList ExtractOwnHistory( return ownTurns.Select(t => t.Msg).ToList(); } + // ── Pending-correction extraction ─────────────────────────────────────── + + // Returns all correction messages in shared history that appear after the last + // assistant turn by agentName. These are unread corrections the agent has not yet + // acted on; they must be included in the assembled context so the agent sees them. + private static IReadOnlyList ExtractPendingCorrections( + string agentName, + IList history) + { + int lastOwnIdx = -1; + for (int i = history.Count - 1; i >= 0; i--) + { + if (history[i].Role == ChatRole.Assistant && + string.Equals(history[i].AuthorName, agentName, StringComparison.OrdinalIgnoreCase)) + { + lastOwnIdx = i; + break; + } + } + + var corrections = new List(); + for (int i = lastOwnIdx + 1; i < history.Count; i++) + { + if (ContextWindowFilter.IsCorrectionMessage(history[i])) + corrections.Add(history[i]); + } + return corrections; + } + // ── Helpers ────────────────────────────────────────────────────────────── private static (string Type, string? Param) ParseSource(string source) diff --git a/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs b/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs index 9c67e3b..122ef71 100644 --- a/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs +++ b/src/Orchestration/Strategies/StateMachineSelectionStrategy.cs @@ -772,21 +772,36 @@ private static bool IsSignalOnOwnLine(string content, string signal) return false; } - // Returns true when a turn-boundary marker already exists after keywordIndex for - // the target state's agent — meaning this signal was consumed in a prior turn. + // Returns true when this specific transition was already consumed after signalIndex. + // + // Two marker types are checked: + // "[fuseraft:blocked {state}→{targetState}]" — the transition was evaluated and + // its contract failed; the signal must not be re-evaluated for that target. + // Markers for OTHER targets do not suppress this transition. + // Any other "[fuseraft: ...]" — a different transition fired, meaning the state + // machine already advanced; the signal is consumed regardless of target. private static bool TransitionAlreadyFired(IList history, int signalIndex, string targetState) { - // We look for "[fuseraft: X → Y]" markers after the signal message. - // Since we don't know the target agent name from here (only the target state), - // we use a simplified check: any turn-boundary marker after this index means - // the selector already processed this turn. for (int j = signalIndex + 1; j < history.Count; j++) { var m = history[j]; if (m.Role != ChatRole.User) continue; var text = m.Text; - if (!string.IsNullOrEmpty(text) && text.StartsWith("[fuseraft:", StringComparison.Ordinal)) - return true; + if (string.IsNullOrEmpty(text)) continue; + if (!text.StartsWith("[fuseraft:", StringComparison.Ordinal)) continue; + + // Blocking markers suppress only the transition they name. + // "[fuseraft:blocked A→B]" blocks A→B but must not block A→C. + if (text.StartsWith("[fuseraft:blocked ", StringComparison.Ordinal)) + { + if (text.Contains($"→{targetState}", StringComparison.OrdinalIgnoreCase)) + return true; + continue; // Different target — does not apply to this transition. + } + + // Any non-blocking marker means the state machine already acted on a signal + // in this lookback window (transition fired or parallel dispatched). + return true; } return false; } diff --git a/src/Orchestration/Workflow/CorrectionEngine.cs b/src/Orchestration/Workflow/CorrectionEngine.cs index 2291fe6..61ac8ff 100644 --- a/src/Orchestration/Workflow/CorrectionEngine.cs +++ b/src/Orchestration/Workflow/CorrectionEngine.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.AI; using fuseraft.Core; +using fuseraft.Core.Models; namespace fuseraft.Orchestration.Workflow; @@ -43,7 +44,8 @@ internal static async Task InjectNoKeywordCorrection( string agentName, int consecutiveCount, AgentRouteTable routeTable, - EventEmitter? eventEmitter = null) + EventEmitter? eventEmitter = null, + IReadOnlyList? turnToolCalls = null) { var validKeywordList = BuildValidKeywordList(routeTable); bool isReviewerType = routeTable.PhaseBreakKeywords.Contains("APPROVED"); @@ -51,7 +53,10 @@ internal static async Task InjectNoKeywordCorrection( if (TryInjectForeignKeywordCorrection(history, responseText, routeTable, agentName, validKeywordList)) return; if (TryInjectCodeBlockCorrection(history, responseText, isReviewerType, validKeywordList)) return; - if (!CurrentTurnHasToolCalls(history)) + // Also treat as "has tool calls" when the AgentMessage records sub-agent tool calls + // that ran inside a SubAgentPlugin — those don't produce ChatRole.Tool entries in the + // outer history so CurrentTurnHasToolCalls would return false without this check. + if (!CurrentTurnHasToolCalls(history) && (turnToolCalls is null || turnToolCalls.Count == 0)) { InjectNoToolCallsCorrection(history, isReviewerType, validKeywordList); return;