diff --git a/CHANGELOG.md b/CHANGELOG.md index 7688486d..22c7a8b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Phase 6 — Study Buddy endpoint (SSE + run history) (2026-06-15) + +Phase 6 **AI-037, slice b** — the reader can now run the agent and watch it work. The panel UI is AI-038. + +- **`POST /me/books/{editionId}/studybuddy`** — authenticated; runs `StudyBuddyAgent` on a highlighted passage and streams its progress over SSE: a **`step`** event per recorded step (index / kind / payload), a **`done`** event with the final answer (+ iterations + cost), or a terminal **`error`** event when the agent fails or exhausts its budget. The run is **persisted** (AI-036) on completion with the right status — a budget-exhausted run keeps its partial transcript. `X-Accel-Buffering: no` for the Cloudflare tunnel; client disconnect propagates untraced; rate-limited (`studybuddy`, 8/min/IP — runs are several LLM calls each). +- **`GET /me/studybuddy/runs/{runId}`** — returns a persisted run (scoped to the user) with its step transcript parsed from jsonb, for the "show steps" view. +- Tests: `StreamRunAsync` over the real agent + loop + scripted LLM — direct answer → `step`→`done` + a persisted `completed` run; never-terminating model → partial `step`s → terminal `error` + a persisted `budget_exhausted` run that keeps its transcript. Live-API integration (skip-friendly): no-auth → 401, empty passage → 400, unknown run → 404. + ### Phase 6 — streaming agent loop (2026-06-14) Phase 6 **AI-037, slice a** — the loop streams its steps so the reader can watch the agent work. The SSE endpoint + run persistence + `GET` are slice b. diff --git a/backend/src/Api/Endpoints/StudyBuddyEndpoints.cs b/backend/src/Api/Endpoints/StudyBuddyEndpoints.cs new file mode 100644 index 00000000..36ff76f6 --- /dev/null +++ b/backend/src/Api/Endpoints/StudyBuddyEndpoints.cs @@ -0,0 +1,181 @@ +using System.Net.ServerSentEvents; +using System.Runtime.CompilerServices; +using System.Text.Json; +using Api.Extensions; +using Api.Sites; +using Application.Agents; +using Application.Auth; +using Application.Common.Interfaces; +using Contracts.Agents; +using Microsoft.EntityFrameworkCore; +using TextStack.Ai.Agents; +using TextStack.Ai.Core; + +namespace Api.Endpoints; + +/// +/// Study Buddy agent endpoints (Phase 6, AI-037b). POST /me/books/{editionId}/studybuddy runs +/// the agent on a highlighted passage and streams its steps over SSE (step* → done | +/// error), persisting the run (AI-036) when it finishes. GET /me/studybuddy/runs/{id} +/// returns a persisted run for the "show steps" UI. Authenticated; rate-limited (agent runs are +/// several LLM calls each). +/// +public static class StudyBuddyEndpoints +{ + private const string Agent = "studybuddy"; + private const int MaxPassageLength = 4000; + + public static void MapStudyBuddyEndpoints(this WebApplication app) + { + app.MapPost("/me/books/{editionId:guid}/studybuddy", Run) + .WithTags("Agents") + .RequireRateLimiting("studybuddy"); + + app.MapGet("/me/studybuddy/runs/{runId:guid}", GetRun) + .WithTags("Agents"); + } + + private static async Task Run( + Guid editionId, + StudyBuddyRequest request, + HttpContext httpContext, + AuthService authService, + IAppDbContext db, + StudyBuddyAgent agent, + IAgentRunWriter writer, + CancellationToken ct) + { + var userId = httpContext.GetUserId(authService); + if (userId is null) return Results.Unauthorized(); + + if (string.IsNullOrWhiteSpace(request.Passage)) + return Results.BadRequest(new { error = "Passage is required." }); + if (request.Passage.Length > MaxPassageLength) + return Results.BadRequest(new { error = $"Passage exceeds {MaxPassageLength} chars." }); + + var siteId = httpContext.GetSiteId(); + var editionExists = await db.Editions.AnyAsync(e => e.Id == editionId && e.SiteId == siteId, ct); + if (!editionExists) return Results.NotFound("Edition not found"); + + // Cloudflare/nginx must not buffer the stream (same Phase 5 risk as Explain SSE). + httpContext.Response.Headers["X-Accel-Buffering"] = "no"; + httpContext.Response.Headers.CacheControl = "no-cache"; + + var runId = Guid.NewGuid(); + var input = new StudyBuddyInput(request.Passage, editionId, request.ChapterNumber); + + return TypedResults.ServerSentEvents(StreamRunAsync( + agent, writer, runId, userId.Value, editionId, input, httpContext.RequestServices, + httpContext.RequestAborted)); + } + + /// + /// Streams the agent's steps as SSE and persists the run on completion. step per recorded + /// step, done with the final answer, or error when the agent fails / exhausts its + /// budget — the run is persisted with the right status either way (a budget-exhausted run keeps + /// its partial transcript, AI-036). Client disconnect propagates untraced. + /// + public static async IAsyncEnumerable> StreamRunAsync( + StudyBuddyAgent agent, + IAgentRunWriter writer, + Guid runId, + Guid userId, + Guid editionId, + StudyBuddyInput input, + IServiceProvider requestServices, + [EnumeratorCancellation] CancellationToken ct) + { + var ctx = new AgentContext(userId, editionId, runId, requestServices); + + AgentResult? result = null; + AgentBudgetExhaustedException? budgetExhausted = null; + Exception? failure = null; + + await using var e = agent.StreamAsync(input, ctx, ct).GetAsyncEnumerator(ct); + while (true) + { + AgentEvent ev; + try + { + if (!await e.MoveNextAsync()) + break; + ev = e.Current; + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + throw; // client disconnected — nothing to emit or persist + } + catch (AgentBudgetExhaustedException ex) + { + budgetExhausted = ex; + break; + } + catch (Exception ex) + { + failure = ex; + break; + } + + if (ev.Step is { } step) + yield return new SseItem(SerializeStep(step), "step"); + else if (ev.Result is { } r) + { + result = r; + yield return new SseItem(SerializeDone(runId, r), "done"); + } + } + + var record = result is not null + ? AgentRunRecordFactory.Completed(runId, Agent, userId, editionId, input.Passage, result) + : budgetExhausted is not null + ? AgentRunRecordFactory.BudgetExhausted(runId, Agent, userId, editionId, input.Passage, budgetExhausted) + : AgentRunRecordFactory.Failed(runId, Agent, userId, editionId, input.Passage, + failure ?? new InvalidOperationException("Agent stream ended without a result.")); + + // Best-effort persistence — a failed write must not break the response the client already got. + var persisted = true; + try + { + await writer.WriteAsync(record, ct); + } + catch + { + persisted = false; + } + + if (result is null) + yield return new SseItem(SerializeError(runId, record.Error, persisted), "error"); + } + + private static async Task GetRun( + Guid runId, HttpContext httpContext, AuthService authService, IAppDbContext db, CancellationToken ct) + { + var userId = httpContext.GetUserId(authService); + if (userId is null) return Results.Unauthorized(); + + var run = await db.AgentRuns + .Where(r => r.Id == runId && r.UserId == userId) + .FirstOrDefaultAsync(ct); + if (run is null) return Results.NotFound(); + + using var steps = JsonDocument.Parse(run.StepsJson); + return Results.Ok(new StudyBuddyRunDto( + run.Id, run.Agent, run.Status, run.Output, steps.RootElement.Clone(), + run.Iterations, run.CostUsd, run.LatencyMs, run.Error, run.CreatedAt)); + } + + private static string SerializeStep(AgentStep step) => + JsonSerializer.Serialize(new { index = step.Index, kind = step.Kind, payload = step.Payload, at = step.At }); + + private static string SerializeDone(Guid runId, AgentResult result) => + JsonSerializer.Serialize(new + { + runId, + answer = result.Output, + iterations = result.Usage.Iterations, + costUsd = result.Usage.CostUsdTotal, + }); + + private static string SerializeError(Guid runId, string? message, bool persisted) => + JsonSerializer.Serialize(new { runId, error = message ?? "Agent run failed", persisted }); +} diff --git a/backend/src/Api/Program.cs b/backend/src/Api/Program.cs index 5367b3aa..1f6ff86e 100644 --- a/backend/src/Api/Program.cs +++ b/backend/src/Api/Program.cs @@ -302,6 +302,17 @@ QueueLimit = 0, }); }); + // Study Buddy agent (AI-037): each run is several LLM calls, so a tighter per-IP limit. + options.AddPolicy("studybuddy", httpContext => + { + var ip = httpContext.Connection.RemoteIpAddress?.ToString() ?? "unknown"; + return RateLimitPartition.GetFixedWindowLimiter(ip, _ => new FixedWindowRateLimiterOptions + { + Window = TimeSpan.FromMinutes(1), + PermitLimit = 8, + QueueLimit = 0, + }); + }); options.RejectionStatusCode = StatusCodes.Status429TooManyRequests; // Emit Retry-After so clients can back off intelligently instead of // hammering in a tight retry loop. RateLimiter exposes the metadata @@ -493,6 +504,7 @@ app.MapAdminAiQualityEndpoints(); app.MapAdminRagEndpoints(); app.MapAskEndpoints(); +app.MapStudyBuddyEndpoints(); app.MapVocabularyEndpoints(); app.MapTtsEndpoints(); app.MapExportEndpoints(); diff --git a/backend/src/Contracts/Agents/StudyBuddyDtos.cs b/backend/src/Contracts/Agents/StudyBuddyDtos.cs new file mode 100644 index 00000000..3870281a --- /dev/null +++ b/backend/src/Contracts/Agents/StudyBuddyDtos.cs @@ -0,0 +1,22 @@ +using System.Text.Json; + +namespace Contracts.Agents; + +/// Request to run the Study Buddy agent on a highlighted passage (Phase 6, AI-037). +public record StudyBuddyRequest(string Passage, int? ChapterNumber); + +/// +/// A persisted Study Buddy run for the "show steps" view (GET /me/studybuddy/runs/{id}). +/// is the transcript parsed from the stored jsonb. +/// +public record StudyBuddyRunDto( + Guid Id, + string Agent, + string Status, + string? Output, + JsonElement Steps, + int Iterations, + decimal CostUsd, + int LatencyMs, + string? Error, + DateTimeOffset CreatedAt); diff --git a/tests/TextStack.IntegrationTests/StudyBuddyEndpointTests.cs b/tests/TextStack.IntegrationTests/StudyBuddyEndpointTests.cs new file mode 100644 index 00000000..538779ba --- /dev/null +++ b/tests/TextStack.IntegrationTests/StudyBuddyEndpointTests.cs @@ -0,0 +1,69 @@ +using System.Net; +using System.Net.Http.Json; + +namespace TextStack.IntegrationTests; + +/// +/// Integration tests for the Study Buddy agent endpoints (AI-037), against the live API. The no-auth / +/// validation / not-found paths run without a key (they're rejected before any agent work); a real +/// streamed run needs a user session + OpenAI key + corpus, so it isn't asserted here. +/// +public class StudyBuddyEndpointTests : IClassFixture +{ + private static readonly Guid SomeEdition = Guid.Parse("11111111-2222-3333-4444-555555555555"); + private readonly AuthenticatedApiFixture _fixture; + + public StudyBuddyEndpointTests(AuthenticatedApiFixture fixture) => _fixture = fixture; + + [Fact] + public async Task Run_NoAuth_Unauthorized() + { + var request = new HttpRequestMessage(HttpMethod.Post, $"/me/books/{SomeEdition}/studybuddy") + { + Content = JsonContent.Create(new { passage = "A confusing passage." }), + }; + request.Headers.Host = LiveApiFixture.TestHost; + + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.SkipWhen(response.StatusCode is HttpStatusCode.NotFound, "endpoint not deployed"); + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task Run_AuthedEmptyPassage_BadRequest() + { + Assert.SkipUnless(_fixture.IsAuthenticated, "auth unavailable"); + + var request = _fixture.CreateRequest(HttpMethod.Post, $"/me/books/{SomeEdition}/studybuddy"); + request.Content = JsonContent.Create(new { passage = "" }); + + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.SkipWhen(response.StatusCode is HttpStatusCode.NotFound, "endpoint not deployed"); + Assert.Equal(HttpStatusCode.BadRequest, response.StatusCode); + } + + [Fact] + public async Task GetRun_NoAuth_Unauthorized() + { + var request = new HttpRequestMessage(HttpMethod.Get, $"/me/studybuddy/runs/{Guid.NewGuid()}"); + request.Headers.Host = LiveApiFixture.TestHost; + + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.SkipWhen(response.StatusCode is HttpStatusCode.NotFound, "endpoint not deployed"); + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task GetRun_AuthedUnknownRun_NotFound() + { + Assert.SkipUnless(_fixture.IsAuthenticated, "auth unavailable"); + + var request = _fixture.CreateRequest(HttpMethod.Get, $"/me/studybuddy/runs/{Guid.NewGuid()}"); + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.Equal(HttpStatusCode.NotFound, response.StatusCode); + } +} diff --git a/tests/TextStack.UnitTests/StudyBuddyEndpointTests.cs b/tests/TextStack.UnitTests/StudyBuddyEndpointTests.cs new file mode 100644 index 00000000..1faef3ff --- /dev/null +++ b/tests/TextStack.UnitTests/StudyBuddyEndpointTests.cs @@ -0,0 +1,101 @@ +using System.Net.ServerSentEvents; +using System.Runtime.CompilerServices; +using System.Text.Json; +using Api.Endpoints; +using Application.Agents; +using Microsoft.Extensions.DependencyInjection; +using TextStack.Ai.Agents; +using TextStack.Ai.Core; +using TextStack.Ai.Tools; + +namespace TextStack.UnitTests; + +/// +/// AI-037b — the Study Buddy SSE stream generator (), +/// driven through the real agent + loop with a scripted LLM and a capturing writer: step events then +/// done + a persisted "completed" run; a budget-exhausted run streams partial steps then an error +/// event and persists "budget_exhausted" with its transcript. +/// +public class StudyBuddyEndpointTests +{ + private sealed class ScriptedLlm(params object[][] turns) : ILlmService + { + private int _turn; + + public Task CompleteAsync(LlmRequest request, CancellationToken ct) + { + var entries = _turn < turns.Length ? turns[_turn] : ["fallback"]; + _turn++; + var text = string.Concat(entries.OfType()); + var calls = entries.OfType().ToList(); + return Task.FromResult(new LlmResponse(text, calls, new LlmUsage(5, 2, 0.001m), "m", Guid.NewGuid())); + } + + public IAsyncEnumerable StreamAsync(LlmRequest request, CancellationToken ct) => + throw new NotSupportedException(); + } + + private sealed class CapturingWriter : IAgentRunWriter + { + public AgentRunRecord? Captured { get; private set; } + public Task WriteAsync(AgentRunRecord run, CancellationToken ct) + { + Captured = run; + return Task.CompletedTask; + } + } + + private static StudyBuddyAgent Agent(ILlmService llm) + { + var registry = new ToolRegistry([]); // no tools → SchemasFor returns empty + return new StudyBuddyAgent(new AgentLoop(llm, registry, new ToolDispatcher(registry))); + } + + private static ToolCall Call() => new("c1", "missing", JsonDocument.Parse("{}").RootElement); + + private static async Task>> Collect( + StudyBuddyAgent agent, IAgentRunWriter writer, Guid editionId) + { + var input = new StudyBuddyInput("A confusing passage.", editionId, ChapterNumber: 2); + var items = new List>(); + await foreach (var item in StudyBuddyEndpoints.StreamRunAsync( + agent, writer, Guid.NewGuid(), Guid.NewGuid(), editionId, input, + new ServiceCollection().BuildServiceProvider(), TestContext.Current.CancellationToken)) + { + items.Add(item); + } + return items; + } + + [Fact] + public async Task Stream_DirectAnswer_EmitsStepThenDone_PersistsCompleted() + { + var writer = new CapturingWriter(); + var events = await Collect(Agent(new ScriptedLlm(["The explanation."])), writer, Guid.NewGuid()); + + Assert.Equal(["step", "done"], events.Select(e => e.EventType)); + Assert.Contains("The explanation.", events[^1].Data); // done carries the answer + + Assert.NotNull(writer.Captured); + Assert.Equal("completed", writer.Captured!.Status); + Assert.Equal("The explanation.", writer.Captured.Output); + } + + [Fact] + public async Task Stream_BudgetExhausted_EmitsStepsThenError_PersistsWithTranscript() + { + // Always "calls" a (missing) tool → never answers → hits MaxSteps (6) → budget exhausted. + var turns = Enumerable.Range(0, 6).Select(_ => new object[] { Call() }).ToArray(); + var writer = new CapturingWriter(); + + var events = await Collect(Agent(new ScriptedLlm(turns)), writer, Guid.NewGuid()); + + Assert.Equal("error", events[^1].EventType); // terminal error event + Assert.Contains(events, e => e.EventType == "step"); // partial steps streamed first + Assert.DoesNotContain(events, e => e.EventType == "done"); + + Assert.NotNull(writer.Captured); + Assert.Equal("budget_exhausted", writer.Captured!.Status); + Assert.NotEmpty(writer.Captured.Steps); // transcript preserved (AI-036) + } +}