diff --git a/.editorconfig b/.editorconfig index 7efe752..55bba51 100644 --- a/.editorconfig +++ b/.editorconfig @@ -17,6 +17,7 @@ csharp_space_after_cast = false csharp_prefer_braces = when_multiline:warning csharp_style_expression_bodied_methods = false:suggestion csharp_style_expression_bodied_properties = true:suggestion +csharp_method_or_operator_body = expression_body:suggestion dotnet_style_qualification_for_field = false:warning dotnet_style_qualification_for_property = false:warning diff --git a/.gitignore b/.gitignore index 920616f..8596f40 100644 --- a/.gitignore +++ b/.gitignore @@ -436,3 +436,6 @@ FodyWeavers.xsd *.msix *.msm *.msp + +appsettings.Development.json +appsettings.Development* \ No newline at end of file diff --git a/docs/builder-api.md b/docs/builder-api.md index d2ab68a..b30e4a6 100644 --- a/docs/builder-api.md +++ b/docs/builder-api.md @@ -385,14 +385,21 @@ Builder API configuration lives under the `TextServices` key in `appsettings.jso ```json { + "RunMigrations": true, "ConnectionStrings": { "BuilderDb": "Host=localhost;Database=textservices;Username=...;Password=..." }, "TextServices": { "SearchApiBaseUrl": "https://search.example.org", + "AllowFileImageProxy": false, "MaxConcurrentPageFetches": 8, "Storage": { - "RootPath": "/data/textservices" + "FileSystem": { + "RootPath": "/data/textservices" + } + }, + "Notifications": { + "TopicArn": "" } }, "CorsAllowedOrigins": ["https://viewer.example.org"] @@ -401,13 +408,16 @@ Builder API configuration lives under the `TextServices` key in `appsettings.jso | Setting | Default | Description | |---|---|---| +| `RunMigrations` | `false` | When `true`, applies any pending EF Core database migrations on startup. Convenient for dev and simple deployments; leave `false` and run migrations separately in production. | | `ConnectionStrings:BuilderDb` | _(required)_ | PostgreSQL connection string. Used for both EF Core (job state) and Hangfire (job queue). | -| `SearchApiBaseUrl` | `""` | Public base URL of the Search API. Used to populate the `searchV1` / `searchV2` fields in completed job responses, and to construct `/proxy/image` URLs in synthesised Manifests when `sourceData` pages supply `file://` or `s3://` imageUri values. Leave empty if the Search API is not yet deployed. | -| `MaxConcurrentPageFetches` | `8` | Maximum number of text files fetched in parallel within a single job. Increase for internal or S3 sources; keep low (4–8) for third-party HTTP hosts. | -| `ReportBatchProgress` | `true` | When `true`, `PagesCompleted` is flushed to the database every 10 pages during processing, allowing `GET /textbuilder/{id}` to reflect live progress. Set to `false` to reduce database writes on large manifests; the final count is still persisted when the job completes. | -| `Storage:RootPath` | `textservices-data` | Root directory for stored text artefacts. Ignored when S3 storage is configured. Must be readable by the Search API if used. | -| `Storage:S3:BucketName` | `""` | S3 bucket for stored artefacts. When set, the S3 store is used instead of the filesystem store. | -| `Storage:S3:KeyPrefix` | `""` | Optional prefix for all S3 object keys (e.g. `"textservices/"`). A trailing `/` is added automatically if omitted. | | `CorsAllowedOrigins` | `[]` | Allowed CORS origins. Empty array disables CORS. | +| `TextServices:SearchApiBaseUrl` | `""` | Public base URL of the Search API. Used to populate the `searchV1` / `searchV2` fields in completed job responses, and to construct `/proxy/image` URLs in synthesised Manifests when `sourceData` pages supply `file://` or `s3://` imageUri values. Leave empty if the Search API is not yet deployed. | +| `TextServices:AllowFileImageProxy` | `false` | When `true`, synthesised Manifests embed `/proxy/image` proxy URLs for `file://` and `s3://` imageUri values instead of omitting the painting annotation. Only enable in trusted local-dev environments. The Search API's `AllowFileImageProxy` must also be `true` for those proxy URLs to serve real content. | +| `TextServices:MaxConcurrentPageFetches` | `8` | Maximum number of text files fetched in parallel within a single job. Increase for internal or S3 sources; keep low (4–8) for third-party HTTP hosts. | +| `TextServices:ReportBatchProgress` | `true` | When `true`, `PagesCompleted` is flushed to the database every 10 pages during processing, allowing `GET /textbuilder/{id}` to reflect live progress. Set to `false` to reduce database writes on large manifests; the final count is still persisted when the job completes. | +| `TextServices:Storage:FileSystem:RootPath` | `textservices-data` | Root directory for stored text artefacts. Ignored when S3 storage is configured. Must be readable by the Search API if used. | +| `TextServices:Storage:S3:BucketName` | `""` | S3 bucket for stored artefacts. When set, the S3 store is used instead of the filesystem store. | +| `TextServices:Storage:S3:KeyPrefix` | `""` | Optional prefix for all S3 object keys (e.g. `"textservices/"`). A trailing `/` is added automatically if omitted. | +| `TextServices:Notifications:TopicArn` | `""` | ARN of an SNS topic to publish a notification to when a job completes (success or failure). Leave empty to disable notifications. | S3 credentials and region are resolved by the standard AWS SDK credential chain (environment variables, instance profile, `appsettings.json` `AWS` section). Configure the region via the `AWS:Region` key or the `AWS_DEFAULT_REGION` environment variable. diff --git a/docs/search-api.md b/docs/search-api.md index cbbeb94..966318a 100644 --- a/docs/search-api.md +++ b/docs/search-api.md @@ -501,7 +501,11 @@ Search API configuration lives under the `TextServices` key in `appsettings.json "CacheSlidingExpirationMinutes": 30, "CacheAbsoluteExpirationHours": 4, "CacheMaxEntries": 20, - "StorageRootPath": "/data/textservices", + "Storage": { + "FileSystem": { + "RootPath": "/data/textservices" + } + }, "PdfTriggerQueueCapacity": 50, "PdfTriggerMaxConcurrency": 2 } @@ -510,15 +514,17 @@ Search API configuration lives under the `TextServices` key in `appsettings.json | Setting | Default | Description | |---|---|---| -| `BaseUrl` | `""` | Public base URL of this API. Required when running behind a reverse proxy; without it, self-referencing URLs in responses will use the incoming `Host` header, which may be internal. | -| `CacheSlidingExpirationMinutes` | `30` | How long a text object stays in the memory cache after its last access. | -| `CacheAbsoluteExpirationHours` | `4` | Hard upper limit on cache lifetime, regardless of access frequency. Prevents large objects from living in the LOH indefinitely. | -| `CacheMaxEntries` | `20` | Maximum number of Text (and AutoComplete) objects held in memory simultaneously. Each object counts as one slot; LRU eviction applies when the limit is reached. Budget approximately 30–40 MB per large text when sizing container memory. | -| `StorageRootPath` | `textservices-data` | Root directory of the text artefact store. Must point to the same location as the Builder API's `Storage:RootPath`. | -| `PdfTriggerQueueCapacity` | `50` | Maximum number of PDF trigger requests that can be queued for background generation. Requests beyond this limit receive `503 Service Unavailable`. | -| `PdfTriggerMaxConcurrency` | `2` | Maximum number of PDFs generated concurrently by the background trigger queue. Each in-flight generation buffers the full PDF in memory — keep this low on memory-constrained hosts. | -| `AllowFileImageProxy` | `false` | When `true`, the `/proxy/image` endpoint streams local `file://` images. Only enable in trusted local-dev environments where those files are not access-controlled. | -| `AllowedCustomHosts` | `[]` | Hostnames accepted from the `X-Forwarded-Host` request header (e.g. custom CloudFront distributions). See [Forwarded-header URL rewriting](#forwarded-header-url-rewriting) below. | +| `TextServices:BaseUrl` | `""` | Public base URL of this API. Required when running behind a reverse proxy; without it, self-referencing URLs in responses will use the incoming `Host` header, which may be internal. | +| `TextServices:CacheSlidingExpirationMinutes` | `30` | How long a text object stays in the memory cache after its last access. | +| `TextServices:CacheAbsoluteExpirationHours` | `4` | Hard upper limit on cache lifetime, regardless of access frequency. Prevents large objects from living in the LOH indefinitely. | +| `TextServices:CacheMaxEntries` | `20` | Maximum number of Text (and AutoComplete) objects held in memory simultaneously. Each object counts as one slot; LRU eviction applies when the limit is reached. Budget approximately 30–40 MB per large text when sizing container memory. | +| `TextServices:Storage:FileSystem:RootPath` | `textservices-data` | Root directory of the text artefact store. Must match the Builder API's `TextServices:Storage:FileSystem:RootPath`. Ignored when S3 storage is configured. | +| `TextServices:Storage:S3:BucketName` | `""` | S3 bucket for stored artefacts. When set, the S3 store is used instead of the filesystem store. Must match the Builder API's `TextServices:Storage:S3:BucketName`. | +| `TextServices:Storage:S3:KeyPrefix` | `""` | Optional prefix for all S3 object keys (e.g. `"textservices/"`). A trailing `/` is added automatically if omitted. Must match the Builder API's `TextServices:Storage:S3:KeyPrefix`. | +| `TextServices:PdfTriggerQueueCapacity` | `50` | Maximum number of PDF trigger requests that can be queued for background generation. Requests beyond this limit receive `503 Service Unavailable`. | +| `TextServices:PdfTriggerMaxConcurrency` | `2` | Maximum number of PDFs generated concurrently by the background trigger queue. Each in-flight generation buffers the full PDF in memory — keep this low on memory-constrained hosts. | +| `TextServices:AllowFileImageProxy` | `false` | When `true`, the `/proxy/image` endpoint streams local `file://` images. Only enable in trusted local-dev environments where those files are not access-controlled. | +| `TextServices:AllowedCustomHosts` | `[]` | Hostnames accepted from the `X-Forwarded-Host` request header (e.g. custom CloudFront distributions). See [Forwarded-header URL rewriting](#forwarded-header-url-rewriting) below. | --- diff --git a/src/TextServices.Builder.Api/Configuration/ServiceCollectionExtensions.cs b/src/TextServices.Builder.Api/Configuration/ServiceCollectionExtensions.cs index c936206..3b49cf1 100644 --- a/src/TextServices.Builder.Api/Configuration/ServiceCollectionExtensions.cs +++ b/src/TextServices.Builder.Api/Configuration/ServiceCollectionExtensions.cs @@ -5,8 +5,11 @@ using Hangfire; using Hangfire.PostgreSql; using Microsoft.Extensions.Options; +using Serilog; +using Serilog.Extensions.Logging; using TextServices.Builder.Api.Services; using TextServices.Builder.Api.Services.Notifications; +using TextServices.Infrastructure.Http; using TextServices.Storage; namespace TextServices.Builder.Api.Configuration; @@ -53,7 +56,7 @@ public static IServiceCollection AddFetchingServices(this IServiceCollection ser { services.AddHttpClient("Resource", client => { - client.DefaultRequestHeaders.UserAgent.ParseAdd("TextServices/1.0"); + client.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgents.Builder); client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/ld+json", 0.9)); client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("*/*", 0.8)); @@ -81,7 +84,9 @@ public static IServiceCollection AddTextStorage(this IServiceCollection services if (!string.IsNullOrEmpty(configuration["TextServices:Storage:S3:BucketName"])) return ActivatorUtilities.CreateInstance(sp); var storage = sp.GetRequiredService>().Value.Storage; - return new FileSystemTextStore(new FileSystemTextStoreOptions { RootPath = storage.RootPath }); + return ActivatorUtilities.CreateInstance( + sp, + new FileSystemTextStoreOptions { RootPath = storage.FileSystem.RootPath }); }); return services; diff --git a/src/TextServices.Builder.Api/Configuration/TextServicesOptions.cs b/src/TextServices.Builder.Api/Configuration/TextServicesOptions.cs index 1b08e15..a426d98 100644 --- a/src/TextServices.Builder.Api/Configuration/TextServicesOptions.cs +++ b/src/TextServices.Builder.Api/Configuration/TextServicesOptions.cs @@ -9,7 +9,6 @@ public class TextServicesOptions /// public string SearchApiBaseUrl { get; set; } = string.Empty; - /// /// Allow the Search API's /proxy/image endpoint to serve file:// image URIs /// supplied in sourceData pages. @@ -26,18 +25,10 @@ public class TextServicesOptions /// referenced by imageUri are not access-controlled. /// /// - public bool AllowFileImageProxy { get; set; } = false; + public bool AllowFileImageProxy { get; set; } /// /// Maximum number of text files (ALTO, VTT, AnnotationPage) fetched concurrently within a single job. - /// The right value depends on the source: - /// - /// Third-party HTTP (Wellcome, Internet Archive, etc.): 4–8 for politeness. - /// Internal/trusted HTTP: 16–32. - /// S3 (same-region, same-bucket): 64–128 — S3 handles high parallelism well. - /// - /// TODO: When S3 storage is added, consider deriving the limit automatically from - /// the URI scheme/host of the text links, or adding a per-host override table here. /// public int MaxConcurrentPageFetches { get; set; } = 8; @@ -57,11 +48,29 @@ public class TextServicesOptions } public class StorageOptions +{ + /// Options for the filesystem text store. + public FileSystemStorageOptions FileSystem { get; set; } = new(); + + /// Options for S3 storage. When is set, S3 is used instead of the filesystem. + public S3StorageOptions S3 { get; set; } = new(); +} + +public class FileSystemStorageOptions { /// Root path under which text artefacts are stored on the filesystem. public string RootPath { get; set; } = "textservices-data"; } +public class S3StorageOptions +{ + /// S3 bucket for stored artefacts. + public string BucketName { get; set; } = string.Empty; + + /// Optional prefix for all S3 object keys (e.g. "textservices/"). A trailing / is added automatically if omitted. + public string KeyPrefix { get; set; } = string.Empty; +} + public class NotificationsOptions { /// diff --git a/src/TextServices.Builder.Api/Features/Jobs/DeleteJob.cs b/src/TextServices.Builder.Api/Features/Jobs/DeleteJob.cs index ee5d7c0..683b9df 100644 --- a/src/TextServices.Builder.Api/Features/Jobs/DeleteJob.cs +++ b/src/TextServices.Builder.Api/Features/Jobs/DeleteJob.cs @@ -23,8 +23,7 @@ public async Task Handle(DeleteJobRequest request, CancellationToken ct) var job = await db.Jobs.FindAsync([request.Id], ct); if (job == null) return false; - if (job.HangfireJobId != null) - hangfire.Delete(job.HangfireJobId); + if (job.HangfireJobId != null) hangfire.Delete(job.HangfireJobId); await textStore.DeleteArtefacts(job.Id); diff --git a/src/TextServices.Builder.Api/Features/Jobs/JobEndpoints.cs b/src/TextServices.Builder.Api/Features/Jobs/JobEndpoints.cs index a1eacd5..0fe19da 100644 --- a/src/TextServices.Builder.Api/Features/Jobs/JobEndpoints.cs +++ b/src/TextServices.Builder.Api/Features/Jobs/JobEndpoints.cs @@ -35,13 +35,13 @@ internal static IEndpointRouteBuilder MapJobEndpoints(this IEndpointRouteBuilder return Results.Ok(result); }); - routes.MapGet("/textbuilder/{**id}", async (string id, ISender sender) => + routes.MapGet("/textbuilder/{*id:minlength(1)}", async (string id, ISender sender) => { var response = await sender.Send(new GetJobRequest(id)); return response == null ? Results.NotFound() : Results.Ok(response); }); - routes.MapPut("/textbuilder/{**id}", async (string id, ISender sender) => + routes.MapPut("/textbuilder/{*id:minlength(1)}", async (string id, ISender sender) => { var result = await sender.Send(new ReprocessJobRequest(id)); return result.Status switch @@ -52,7 +52,7 @@ internal static IEndpointRouteBuilder MapJobEndpoints(this IEndpointRouteBuilder }; }); - routes.MapDelete("/textbuilder/{**id}", async (string id, ISender sender) => + routes.MapDelete("/textbuilder/{*id:minlength(1)}", async (string id, ISender sender) => { var found = await sender.Send(new DeleteJobRequest(id)); return found ? Results.NoContent() : Results.NotFound(); diff --git a/src/TextServices.Builder.Api/Features/Jobs/JobInstruction.cs b/src/TextServices.Builder.Api/Features/Jobs/JobInstruction.cs index 2497e5c..8768bb5 100644 --- a/src/TextServices.Builder.Api/Features/Jobs/JobInstruction.cs +++ b/src/TextServices.Builder.Api/Features/Jobs/JobInstruction.cs @@ -46,6 +46,13 @@ public class JobInstruction : IValidatableObject public IEnumerable Validate(ValidationContext validationContext) { + if (Id.StartsWith('/')) + { + yield return new ValidationResult( + "Job ID must not start with '/'.", + [nameof(Id)]); + } + var hasUri = !string.IsNullOrWhiteSpace(SourceUri); var hasData = SourceData is { Count: > 0 }; @@ -63,9 +70,9 @@ public IEnumerable Validate(ValidationContext validationContex [nameof(SourceUri), nameof(SourceData)]); } - if (SourceData != null) + if (hasData) { - foreach (var page in SourceData) + foreach (var page in SourceData!) { if (!string.Equals(page.Type, "pdf", StringComparison.OrdinalIgnoreCase) && string.IsNullOrEmpty(page.Id)) diff --git a/src/TextServices.Builder.Api/Features/Jobs/JobResponse.cs b/src/TextServices.Builder.Api/Features/Jobs/JobResponse.cs index 9850bbb..de27682 100644 --- a/src/TextServices.Builder.Api/Features/Jobs/JobResponse.cs +++ b/src/TextServices.Builder.Api/Features/Jobs/JobResponse.cs @@ -1,5 +1,6 @@ using TextServices.Builder.Api.Configuration; using TextServices.Builder.Api.Data; +using TextServices.Infrastructure; using TextServices.Storage; namespace TextServices.Builder.Api.Features.Jobs; @@ -76,30 +77,30 @@ public static JobResponse From(BuilderJob job, TextServicesOptions options) if (fulfilled.HasFlag(JobServices.Search)) { - searchV1 = $"{baseUrl}/search/v1/{job.Id}"; - searchV2 = $"{baseUrl}/search/v2/{job.Id}"; + searchV1 = SearchApiRoutes.SearchV1(baseUrl, job.Id); + searchV2 = SearchApiRoutes.SearchV2(baseUrl, job.Id); } if (fulfilled.HasFlag(JobServices.Autocomplete)) { - autocompleteV1 = $"{baseUrl}/autocomplete/v1/{job.Id}"; - autocompleteV2 = $"{baseUrl}/autocomplete/v2/{job.Id}"; + autocompleteV1 = SearchApiRoutes.AutocompleteV1(baseUrl, job.Id); + autocompleteV2 = SearchApiRoutes.AutocompleteV2(baseUrl, job.Id); } if (fulfilled.HasFlag(JobServices.FullText)) - fullText = $"{baseUrl}/text/v1/{job.Id}"; + fullText = SearchApiRoutes.FullText(baseUrl, job.Id); if (fulfilled.HasFlag(JobServices.Pdf)) - pdf = $"{baseUrl}/pdf/v1/{job.Id}"; + pdf = SearchApiRoutes.Pdf(baseUrl, job.Id); if (fulfilled.HasFlag(JobServices.TextAugmented)) - textAugmented = $"{baseUrl}/text-augmented/v3/{job.Id}"; + textAugmented = SearchApiRoutes.TextAugmented(baseUrl, job.Id); if (fulfilled.HasFlag(JobServices.Annotations)) - annotations = $"{baseUrl}/annotations/manifest/v1/{job.Id}"; + annotations = SearchApiRoutes.AnnotationsManifest(baseUrl, job.Id); if (fulfilled.HasFlag(JobServices.Figures)) - figures = $"{baseUrl}/identified/figures/{job.Id}"; + figures = SearchApiRoutes.Figures(baseUrl, job.Id); } return new JobResponse diff --git a/src/TextServices.Builder.Api/Jobs/LogContextHelpers.cs b/src/TextServices.Builder.Api/Jobs/LogContextHelpers.cs new file mode 100644 index 0000000..a48714f --- /dev/null +++ b/src/TextServices.Builder.Api/Jobs/LogContextHelpers.cs @@ -0,0 +1,14 @@ +using Serilog.Context; +using Serilog.Core.Enrichers; + +namespace TextServices.Builder.Api.Jobs; + +internal static class LogContextHelpers +{ + /// + /// "CorrelationId" properties to log context, which is then output as part of default log template. + /// This is useful for filtering logs. Consists of {jobId}:{random-guid} + /// + public static IDisposable SetCorrelationId(string jobId) => + LogContext.Push(new PropertyEnricher("CorrelationId", $"{jobId}:{Guid.NewGuid()}")); +} diff --git a/src/TextServices.Builder.Api/Jobs/TextBuildJob.cs b/src/TextServices.Builder.Api/Jobs/TextBuildJob.cs index fb2a759..95b4c2d 100644 --- a/src/TextServices.Builder.Api/Jobs/TextBuildJob.cs +++ b/src/TextServices.Builder.Api/Jobs/TextBuildJob.cs @@ -35,6 +35,7 @@ public class TextBuildJob( ITextStore textStore, IJobNotifier jobNotifier, IOptions options, + ILoggerFactory loggerFactory, ILogger logger) { private const int ProgressBatchSize = 10; @@ -45,57 +46,60 @@ private record FetchedPage(PageInstruction Page, XElement? Xml, string? StringCo [JobDisplayName("TextBuild: {0}")] public async Task ExecuteAsync(string jobId, IJobCancellationToken cancellationToken) { - var job = await db.Jobs.FindAsync(jobId); - if (job == null) + using (LogContextHelpers.SetCorrelationId(jobId)) { - logger.LogWarning("TextBuildJob invoked for unknown job {JobId}", jobId); - return; - } - - job.Status = JobStatus.Running; - job.Started = DateTimeOffset.UtcNow; - await db.SaveChangesAsync(); + var job = await db.Jobs.FindAsync(jobId); + if (job == null) + { + logger.LogWarning("TextBuildJob invoked for unknown job {JobId}", jobId); + return; + } - try - { - var pages = await GetPages(job, cancellationToken.ShutdownToken); - job.TotalPages = pages.Count; + job.Status = JobStatus.Running; + job.Started = DateTimeOffset.UtcNow; await db.SaveChangesAsync(); - var (wordCount, imageCount, errors, fulfilled) = - await ProcessPages(job, pages, cancellationToken); + try + { + var pages = await GetPages(job, cancellationToken.ShutdownToken); + job.TotalPages = pages.Count; + await db.SaveChangesAsync(); - job.TotalWordCount = wordCount; - job.TotalImageCount = imageCount; - job.Errors = errors.Count > 0 ? string.Join('\n', errors) : null; - job.FulfilledServices = (int)fulfilled; - job.Status = JobStatus.Completed; - job.Finished = DateTimeOffset.UtcNow; - await db.SaveChangesAsync(); + var (wordCount, imageCount, errors, fulfilled) = + await ProcessPages(job, pages, cancellationToken); - logger.LogInformation( - "TextBuildJob completed for {JobId}: {WordCount} words, {ImageCount} images, " + - "{ErrorCount} page error(s)", - jobId, wordCount, imageCount, errors.Count); + job.TotalWordCount = wordCount; + job.TotalImageCount = imageCount; + job.Errors = errors.Count > 0 ? string.Join('\n', errors) : null; + job.FulfilledServices = (int)fulfilled; + job.Status = JobStatus.Completed; + job.Finished = DateTimeOffset.UtcNow; + await db.SaveChangesAsync(); - await jobNotifier.Notify( - new JobCompletionNotification(job.Id, job.Status, job.Finished, - job.TotalPages, job.TotalWordCount, job.Errors), - cancellationToken.ShutdownToken); - } - catch (Exception ex) - { - logger.LogError(ex, "TextBuildJob failed for {JobId}", jobId); - job.Status = JobStatus.Failed; - job.Errors = ex.Message; - job.FulfilledServices = (int)JobServices.None; - job.Finished = DateTimeOffset.UtcNow; - await db.SaveChangesAsync(); + logger.LogInformation( + "TextBuildJob completed for {JobId}: {WordCount} words, {ImageCount} images, " + + "{ErrorCount} page error(s)", + jobId, wordCount, imageCount, errors.Count); - await jobNotifier.Notify( - new JobCompletionNotification(job.Id, job.Status, job.Finished, - job.TotalPages, job.TotalWordCount, job.Errors), - cancellationToken.ShutdownToken); + await jobNotifier.Notify( + new JobCompletionNotification(job.Id, job.Status, job.Finished, + job.TotalPages, job.TotalWordCount, job.Errors), + cancellationToken.ShutdownToken); + } + catch (Exception ex) + { + logger.LogError(ex, "TextBuildJob failed for {JobId}", jobId); + job.Status = JobStatus.Failed; + job.Errors = ex.Message; + job.FulfilledServices = (int)JobServices.None; + job.Finished = DateTimeOffset.UtcNow; + await db.SaveChangesAsync(); + + await jobNotifier.Notify( + new JobCompletionNotification(job.Id, job.Status, job.Finished, + job.TotalPages, job.TotalWordCount, job.Errors), + cancellationToken.ShutdownToken); + } } } @@ -143,18 +147,6 @@ private async Task> GetPages(BuilderJob job, Canc // Fetch all pages concurrently, bounded by MaxConcurrentPageFetches. // Results are stored into a pre-allocated array so TextBuilder receives // canvases in the correct sequence. - // - // TODO: The right concurrency limit depends on where the text files live. - // - Third-party HTTP (e.g. Wellcome, Internet Archive): keep low (4–8) for - // politeness and to avoid rate-limiting. - // - Internal/trusted HTTP: can be higher (16–32). - // - S3 (s3:// or https://*.s3.amazonaws.com): S3 supports very high - // parallelism on the same bucket; 64–128 is reasonable. When an S3 - // ITextStore is in use the ALTO URIs will typically be pre-signed HTTPS - // URLs or s3:// keys fetched via the AWS SDK — detect by scheme or - // hostname pattern and use a higher limit for those. - // Consider deriving the limit from the scheme/host of pages[0].Text, or - // adding a per-host override table to TextServicesOptions. // pdf-type pages embed an existing PDF; they have no text to build. // Custom-type pages are generated at PDF render time; no text to fetch. @@ -170,7 +162,7 @@ await Parallel.ForEachAsync(Enumerable.Range(0, pagesToFetch.Count), async (i, ct) => { fetched[i] = await FetchPageAsync(pagesToFetch[i], ct); }); // Build text in original canvas order (TextBuilder requires sequential input). - var textBuilder = new TextBuilder(); + var textBuilder = new TextBuilder(loggerFactory); var errors = fetched.Where(r => r.Error != null).Select(r => r.Error!).ToList(); int completed = 0; @@ -499,7 +491,7 @@ private async Task FetchPageAsync(PageInstruction page, Cancellatio } catch (Exception ex) { - logger.LogWarning(ex, "Failed to fetch page {CanvasId} from {Url}", page.Id, page.TextUri); + logger.LogWarning(ex, "Failed to fetch {Url} for canvas {CanvasId}", page.TextUri, page.Id); return new FetchedPage(page, null, null, $"{page.Id}: {ex.Message}"); } } diff --git a/src/TextServices.Builder.Api/Services/AltoFetcher.cs b/src/TextServices.Builder.Api/Services/AltoFetcher.cs index 125f57b..2e13891 100644 --- a/src/TextServices.Builder.Api/Services/AltoFetcher.cs +++ b/src/TextServices.Builder.Api/Services/AltoFetcher.cs @@ -2,10 +2,11 @@ namespace TextServices.Builder.Api.Services; -public sealed class AltoFetcher(IResourceFetcher fetcher) : IAltoFetcher +public sealed class AltoFetcher(IResourceFetcher fetcher, ILogger logger) : IAltoFetcher { - public async Task FetchAsync(string uri, CancellationToken ct = default) + public async Task FetchAsync(string uri, CancellationToken ct) { + logger.LogDebug("Fetching alto: {Uri}", uri); await using var stream = await fetcher.FetchAsync(uri, ct); if (stream is null) return null; diff --git a/src/TextServices.Builder.Api/Services/AnnotationPageFetcher.cs b/src/TextServices.Builder.Api/Services/AnnotationPageFetcher.cs index 8983ba1..0a833bd 100644 --- a/src/TextServices.Builder.Api/Services/AnnotationPageFetcher.cs +++ b/src/TextServices.Builder.Api/Services/AnnotationPageFetcher.cs @@ -1,12 +1,10 @@ -using Microsoft.Extensions.Logging; - namespace TextServices.Builder.Api.Services; public sealed class AnnotationPageFetcher( IResourceFetcher fetcher, ILogger logger) : IAnnotationPageFetcher { - public async Task FetchAsync(string uri, CancellationToken ct = default) + public async Task FetchAsync(string uri, CancellationToken ct) { logger.LogDebug("Fetching annotation page: {Uri}", uri); await using var stream = await fetcher.FetchAsync(uri, ct); diff --git a/src/TextServices.Builder.Api/Services/IAltoFetcher.cs b/src/TextServices.Builder.Api/Services/IAltoFetcher.cs index fc40784..910c693 100644 --- a/src/TextServices.Builder.Api/Services/IAltoFetcher.cs +++ b/src/TextServices.Builder.Api/Services/IAltoFetcher.cs @@ -13,5 +13,5 @@ public interface IAltoFetcher /// or the response body is empty — both treated as a sparse page. /// Throws for other HTTP errors or parse failures. /// - Task FetchAsync(string uri, CancellationToken ct = default); + Task FetchAsync(string uri, CancellationToken ct); } diff --git a/src/TextServices.Builder.Api/Services/IAnnotationPageFetcher.cs b/src/TextServices.Builder.Api/Services/IAnnotationPageFetcher.cs index a4c5e79..cbd8beb 100644 --- a/src/TextServices.Builder.Api/Services/IAnnotationPageFetcher.cs +++ b/src/TextServices.Builder.Api/Services/IAnnotationPageFetcher.cs @@ -10,5 +10,5 @@ public interface IAnnotationPageFetcher /// or if the resource returns HTTP 404. /// Throws for other failure status codes. /// - Task FetchAsync(string uri, CancellationToken ct = default); + Task FetchAsync(string uri, CancellationToken ct); } diff --git a/src/TextServices.Builder.Api/Services/IManifestFetcher.cs b/src/TextServices.Builder.Api/Services/IManifestFetcher.cs index fb5f1a0..ea03579 100644 --- a/src/TextServices.Builder.Api/Services/IManifestFetcher.cs +++ b/src/TextServices.Builder.Api/Services/IManifestFetcher.cs @@ -12,7 +12,7 @@ public interface IManifestFetcher /// Downloads the manifest at , stores the raw JSON, and /// returns both the JSON string and the reduced page sequence. /// - Task FetchAndReduce(string uri, CancellationToken ct = default); + Task FetchAndReduce(string uri, CancellationToken ct); } /// The raw manifest JSON as returned by the server. diff --git a/src/TextServices.Builder.Api/Services/IResourceFetcher.cs b/src/TextServices.Builder.Api/Services/IResourceFetcher.cs index 0b7646c..bdca91a 100644 --- a/src/TextServices.Builder.Api/Services/IResourceFetcher.cs +++ b/src/TextServices.Builder.Api/Services/IResourceFetcher.cs @@ -15,5 +15,5 @@ public interface IResourceFetcher /// /// s3:// URI was given but S3 is not configured. /// - Task FetchAsync(string uri, CancellationToken ct = default); + Task FetchAsync(string uri, CancellationToken ct); } diff --git a/src/TextServices.Builder.Api/Services/IVttFetcher.cs b/src/TextServices.Builder.Api/Services/IVttFetcher.cs index e6296b8..6058045 100644 --- a/src/TextServices.Builder.Api/Services/IVttFetcher.cs +++ b/src/TextServices.Builder.Api/Services/IVttFetcher.cs @@ -10,5 +10,5 @@ public interface IVttFetcher /// Returns for HTTP 404 (sparse page). /// Throws for other failure conditions. /// - Task FetchAsync(string uri, CancellationToken ct = default); + Task FetchAsync(string uri, CancellationToken ct); } diff --git a/src/TextServices.Builder.Api/Services/ManifestFetcher.cs b/src/TextServices.Builder.Api/Services/ManifestFetcher.cs index 5710bcb..8c36278 100644 --- a/src/TextServices.Builder.Api/Services/ManifestFetcher.cs +++ b/src/TextServices.Builder.Api/Services/ManifestFetcher.cs @@ -2,7 +2,7 @@ namespace TextServices.Builder.Api.Services; public sealed class ManifestFetcher(IResourceFetcher fetcher, IManifestReducer reducer) : IManifestFetcher { - public async Task FetchAndReduce(string uri, CancellationToken ct = default) + public async Task FetchAndReduce(string uri, CancellationToken ct) { await using var stream = await fetcher.FetchAsync(uri, ct) ?? throw new InvalidOperationException($"Manifest not found at: {uri}"); diff --git a/src/TextServices.Builder.Api/Services/Notifications/SnsJobNotifier.cs b/src/TextServices.Builder.Api/Services/Notifications/SnsJobNotifier.cs index 26e8aff..3391275 100644 --- a/src/TextServices.Builder.Api/Services/Notifications/SnsJobNotifier.cs +++ b/src/TextServices.Builder.Api/Services/Notifications/SnsJobNotifier.cs @@ -20,6 +20,7 @@ public async Task Notify(JobCompletionNotification notification, CancellationTok try { var messageType = notification.Status == JobStatus.Completed ? "JobCompleted" : "JobFailed"; + logger.LogDebug("Raising {MessageType} notification: {JobId}", messageType, notification.JobId); await sns.PublishAsync(new PublishRequest { diff --git a/src/TextServices.Builder.Api/Services/ResourceFetcher.cs b/src/TextServices.Builder.Api/Services/ResourceFetcher.cs index 0640481..43ea506 100644 --- a/src/TextServices.Builder.Api/Services/ResourceFetcher.cs +++ b/src/TextServices.Builder.Api/Services/ResourceFetcher.cs @@ -22,8 +22,7 @@ public sealed class ResourceFetcher(IHttpClientFactory httpClientFactory, IAmazo "file" => Task.FromResult(FetchFile(parsed)), "s3" => FetchS3Async(parsed, ct), "http" or "https" => FetchHttpAsync(uri, ct), - var scheme => throw new NotSupportedException( - $"Unsupported URI scheme '{scheme}': {uri}"), + var scheme => throw new NotSupportedException($"Unsupported URI scheme '{scheme}': {uri}"), }; } @@ -44,8 +43,7 @@ public sealed class ResourceFetcher(IHttpClientFactory httpClientFactory, IAmazo { if (s3 is null) { - throw new InvalidOperationException( - $"s3:// URI encountered but IAmazonS3 is not configured: {uri}"); + throw new InvalidOperationException($"s3:// URI encountered but IAmazonS3 is not configured: {uri}"); } var bucket = uri.Host; @@ -53,8 +51,7 @@ public sealed class ResourceFetcher(IHttpClientFactory httpClientFactory, IAmazo try { - var response = await s3.GetObjectAsync( - new GetObjectRequest { BucketName = bucket, Key = key }, ct); + var response = await s3.GetObjectAsync(new GetObjectRequest { BucketName = bucket, Key = key }, ct); return response.ResponseStream; } catch (AmazonS3Exception ex) when (ex.StatusCode == HttpStatusCode.NotFound) diff --git a/src/TextServices.Builder.Api/Services/VttFetcher.cs b/src/TextServices.Builder.Api/Services/VttFetcher.cs index 45cb0a5..d325c7d 100644 --- a/src/TextServices.Builder.Api/Services/VttFetcher.cs +++ b/src/TextServices.Builder.Api/Services/VttFetcher.cs @@ -1,10 +1,8 @@ -using Microsoft.Extensions.Logging; - namespace TextServices.Builder.Api.Services; public sealed class VttFetcher(IResourceFetcher fetcher, ILogger logger) : IVttFetcher { - public async Task FetchAsync(string uri, CancellationToken ct = default) + public async Task FetchAsync(string uri, CancellationToken ct) { logger.LogDebug("Fetching VTT: {Uri}", uri); await using var stream = await fetcher.FetchAsync(uri, ct); diff --git a/src/TextServices.Builder.Api/TextServices.Builder.Api.csproj b/src/TextServices.Builder.Api/TextServices.Builder.Api.csproj index bb7933c..2d4814a 100644 --- a/src/TextServices.Builder.Api/TextServices.Builder.Api.csproj +++ b/src/TextServices.Builder.Api/TextServices.Builder.Api.csproj @@ -10,12 +10,13 @@ + - + runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/src/TextServices.Builder.Api/appsettings.Development-Example.json b/src/TextServices.Builder.Api/appsettings.Development-Example.json new file mode 100644 index 0000000..7a0bc49 --- /dev/null +++ b/src/TextServices.Builder.Api/appsettings.Development-Example.json @@ -0,0 +1,11 @@ +{ + "RunMigrations": true, + "ConnectionStrings": { + "BuilderDb": "Server=127.0.0.1;Port=5452;Database=postgres;User Id=postgres;Password=txt_password;IncludeErrorDetail=True" + }, + "CorsAllowedOrigins": [ "http://localhost:5100" ], + "TextServices": { + "SearchApiBaseUrl": "http://localhost:5294", + "AllowFileImageProxy": true + } +} diff --git a/src/TextServices.Builder.Api/appsettings.Test.json b/src/TextServices.Builder.Api/appsettings.Test.json new file mode 100644 index 0000000..956f4a1 --- /dev/null +++ b/src/TextServices.Builder.Api/appsettings.Test.json @@ -0,0 +1,12 @@ +{ + "TextServices": { + "Storage": { + "S3": { + "BucketName": "" + } + }, + "Notifications": { + "TopicArn": "" + } + } +} diff --git a/src/TextServices.Builder.Api/appsettings.json b/src/TextServices.Builder.Api/appsettings.json index 05b7fa6..270431d 100644 --- a/src/TextServices.Builder.Api/appsettings.json +++ b/src/TextServices.Builder.Api/appsettings.json @@ -19,15 +19,5 @@ ] }, "AllowedHosts": "*", - "RunMigrations": false, - "CorsAllowedOrigins": [ "http://localhost:5100" ], - "ConnectionStrings": { - "BuilderDb": "" - }, - "TextServices": { - "SearchApiBaseUrl": "", - "Storage": { - "RootPath": "C:/textservices-data" - } - } + "RunMigrations": false } diff --git a/src/TextServices.Core/Providers/AltoTextFormatProvider.cs b/src/TextServices.Core/Providers/AltoTextFormatProvider.cs index 37c907b..0eae22d 100644 --- a/src/TextServices.Core/Providers/AltoTextFormatProvider.cs +++ b/src/TextServices.Core/Providers/AltoTextFormatProvider.cs @@ -1,5 +1,7 @@ using System.Runtime.CompilerServices; using System.Xml.Linq; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Core.Models; namespace TextServices.Core.Providers; @@ -9,8 +11,10 @@ namespace TextServices.Core.Providers; /// Supports both ns-v2 (http://www.loc.gov/standards/alto/ns-v2#) and ns-v3 /// (http://www.loc.gov/standards/alto/ns-v3#) as well as namespace-free ALTO. /// -public class AltoTextFormatProvider : ITextFormatProvider +public class AltoTextFormatProvider(ILogger? logger = null) : ITextFormatProvider { + private readonly ILogger _logger = logger ?? NullLogger.Instance; + // Hyphen character used in some ALTO sources as a soft-hyphen marker private const char HyphenSpecial = '¬'; @@ -40,6 +44,8 @@ public void ProcessPage( int canvasHeight) { var ns = DetectAltoNamespace(root); + _logger.LogDebug("Processing ALTO page '{Id}' with {Namespace} namespace", + imageIdentifier, ns == XNamespace.None ? "none" : ns.NamespaceName); var pageElement = FindDescendant(root, ns, "Page"); if (pageElement == null) return; @@ -51,7 +57,11 @@ public void ProcessPage( float scaleH = altoHeight > 0 ? canvasHeight / altoHeight : 1f; var printSpace = FindDescendant(pageElement, ns, "PrintSpace"); - if (printSpace == null) return; + if (printSpace == null) + { + _logger.LogWarning("No PrintSpace found for ALTO page '{Id}'; page will be empty", imageIdentifier); + return; + } accumulator.BeginPage(imageIdentifier); @@ -94,6 +104,7 @@ public void ProcessPage( accumulator.LastWordNormPosition, composedBlockTracker); } + _logger.LogDebug("Merged hyphenated word on page '{Id}'", imageIdentifier); hyphenPending = false; startIdx = 1; // HypPart2 string consumed — skip it below } @@ -143,6 +154,7 @@ public void ProcessPage( // If the document ends with an unmatched HypPart1 (edge case), commit it as-is. if (hyphenPending && !string.IsNullOrEmpty(pendingNorm)) { + _logger.LogWarning("Unmatched HypPart1 at end of ALTO page '{Id}'", imageIdentifier); accumulator.AddWord(pendingRaw, pendingNorm, pendingX, pendingY, pendingW, pendingH, pendingSp); } diff --git a/src/TextServices.Core/Providers/HocrTextFormatProvider.cs b/src/TextServices.Core/Providers/HocrTextFormatProvider.cs index 6ead4ca..8776eb5 100644 --- a/src/TextServices.Core/Providers/HocrTextFormatProvider.cs +++ b/src/TextServices.Core/Providers/HocrTextFormatProvider.cs @@ -1,4 +1,6 @@ using System.Xml.Linq; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Core.Models; namespace TextServices.Core.Providers; @@ -14,8 +16,10 @@ namespace TextServices.Core.Providers; /// engine producing ocr_word spans. Well-formed XHTML input is required; /// is used by the fetcher before this provider receives the root. /// -public class HocrTextFormatProvider : ITextFormatProvider +public class HocrTextFormatProvider(ILogger? logger = null) : ITextFormatProvider { + private readonly ILogger _logger = logger ?? NullLogger.Instance; + /// /// /// Detects hOCR by looking for "hocr" in the seeAlso URI @@ -39,7 +43,12 @@ public void ProcessPage( int canvasHeight) { // Locate the ocr_page element — may be the root itself or a descendant. - var pageEl = FindByClass(root, "ocr_page").FirstOrDefault() ?? root; + var foundPage = FindByClass(root, "ocr_page").FirstOrDefault(); + if (foundPage == null) + { + _logger.LogDebug("No ocr_page element found for page '{Id}'; using root element", imageIdentifier); + } + var pageEl = foundPage ?? root; // Derive the coordinate scale from the page-level bbox when present. // hOCR bbox coords are x0 y0 x1 y1; for a page starting at 0,0 x1=pageW, y1=pageH. @@ -66,8 +75,13 @@ public void ProcessPage( foreach (var wordEl in wordEls) { - var bbox = ParseBbox(wordEl.Attribute("title")?.Value); - if (bbox is null) continue; + var wordTitle = wordEl.Attribute("title")?.Value; + var bbox = ParseBbox(wordTitle); + if (bbox is null) + { + if (wordTitle != null) _logger.LogDebug("Could not parse bbox for word on page '{Id}'; skipping", imageIdentifier); + continue; + } // Collect all text under this element, ignoring child markup. var rawWord = string.Concat( diff --git a/src/TextServices.Core/Providers/TextBuilder.cs b/src/TextServices.Core/Providers/TextBuilder.cs index 329a6ea..fa9cd15 100644 --- a/src/TextServices.Core/Providers/TextBuilder.cs +++ b/src/TextServices.Core/Providers/TextBuilder.cs @@ -1,4 +1,6 @@ using System.Xml.Linq; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Core.Models; namespace TextServices.Core.Providers; @@ -23,15 +25,25 @@ public class TextBuilder private readonly IReadOnlyList _providers; private readonly IReadOnlyList _transcriptProviders; private readonly TextAccumulator _accumulator = new(); + private readonly ILogger _logger; /// /// Initialises a with the default set of providers /// (ALTO ns-v2 / ns-v3, hOCR, and VTT). /// - public TextBuilder() : this( - [new AltoTextFormatProvider(), new HocrTextFormatProvider()], - [new VttTextFormatProvider(), new W3cAnnotationTextFormatProvider()]) - { } + public TextBuilder(ILoggerFactory? loggerFactory = null) + { + loggerFactory ??= NullLoggerFactory.Instance; + _logger = loggerFactory.CreateLogger(); + _providers = [ + new AltoTextFormatProvider(loggerFactory.CreateLogger()), + new HocrTextFormatProvider(loggerFactory.CreateLogger()), + ]; + _transcriptProviders = [ + new VttTextFormatProvider(loggerFactory.CreateLogger()), + new W3cAnnotationTextFormatProvider(loggerFactory.CreateLogger()), + ]; + } /// /// Initialises a with an explicit list of providers, @@ -40,6 +52,7 @@ public TextBuilder() : this( /// public TextBuilder(IReadOnlyList providers, IReadOnlyList? transcriptProviders = null) { + _logger = NullLogger.Instance; _providers = providers; _transcriptProviders = transcriptProviders ?? []; } @@ -66,7 +79,11 @@ public void AddPage( if (root == null) return; var provider = _providers.FirstOrDefault(p => p.Supports(profile, label)); - if (provider == null) return; + if (provider == null) + { + _logger.LogWarning("No provider found for page '{Id}' (profile='{Profile}', label='{Label}')", id, profile, label); + return; + } provider.ProcessPage(_accumulator, root, id, canvasWidth, canvasHeight); } @@ -85,7 +102,11 @@ public void AddTranscriptPage( { if (rawContent == null) return; var provider = _transcriptProviders.FirstOrDefault(p => p.Supports(profile, format, label)); - if (provider == null) return; + if (provider == null) + { + _logger.LogWarning("No provider found for transcript page '{Id}' (profile='{Profile}', format='{Format}', label='{Label}')", id, profile, format, label); + return; + } provider.ProcessPage(_accumulator, rawContent, id, canvasWidth, canvasHeight); } diff --git a/src/TextServices.Core/Providers/VttTextFormatProvider.cs b/src/TextServices.Core/Providers/VttTextFormatProvider.cs index 28462e5..a86b505 100644 --- a/src/TextServices.Core/Providers/VttTextFormatProvider.cs +++ b/src/TextServices.Core/Providers/VttTextFormatProvider.cs @@ -1,3 +1,5 @@ +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Core.Models; namespace TextServices.Core.Providers; @@ -6,8 +8,10 @@ namespace TextServices.Core.Providers; /// Processes WebVTT caption/subtitle files into the text index. /// Each VTT cue becomes a set of words sharing the same time range and line number. /// -public class VttTextFormatProvider : IStringFormatProvider +public class VttTextFormatProvider(ILogger? logger = null) : IStringFormatProvider { + private readonly ILogger _logger = logger ?? NullLogger.Instance; + public bool Supports(string? profile, string? format, string? label) => ContainsIgnoreCase(profile, "text/vtt") || ContainsIgnoreCase(format, "text/vtt") || ContainsIgnoreCase(profile, "vtt") || ContainsIgnoreCase(format, "vtt") || @@ -43,7 +47,15 @@ public void ProcessPage( // Timing line var timingLine = lines[i++]; - var (startMs, endMs) = ParseTimingLine(timingLine); + int startMs = 0, endMs = 0; + try + { + (startMs, endMs) = ParseTimingLine(timingLine); + } + catch (Exception) + { + _logger.LogWarning("Failed to parse VTT timing line on page '{Id}': {Value}", imageIdentifier, timingLine); + } // Collect cue text lines until blank line var cueLines = new List(); @@ -83,25 +95,21 @@ private static (int startMs, int endMs) ParseTimingLine(string line) private static int ParseTimestamp(string s) { // Accepts HH:MM:SS.mmm or MM:SS.mmm - try + var dotIdx = s.IndexOf('.'); + var ms = dotIdx >= 0 ? int.Parse(s[(dotIdx + 1)..].PadRight(3, '0')[..3]) : 0; + var timePart = dotIdx >= 0 ? s[..dotIdx] : s; + var colonParts = timePart.Split(':'); + return colonParts.Length switch { - var dotIdx = s.IndexOf('.'); - var ms = dotIdx >= 0 ? int.Parse(s[(dotIdx + 1)..].PadRight(3, '0')[..3]) : 0; - var timePart = dotIdx >= 0 ? s[..dotIdx] : s; - var colonParts = timePart.Split(':'); - return colonParts.Length switch - { - 3 => int.Parse(colonParts[0]) * 3_600_000 - + int.Parse(colonParts[1]) * 60_000 - + int.Parse(colonParts[2]) * 1_000 - + ms, - 2 => int.Parse(colonParts[0]) * 60_000 - + int.Parse(colonParts[1]) * 1_000 - + ms, - _ => 0, - }; - } - catch { return 0; } + 3 => int.Parse(colonParts[0]) * 3_600_000 + + int.Parse(colonParts[1]) * 60_000 + + int.Parse(colonParts[2]) * 1_000 + + ms, + 2 => int.Parse(colonParts[0]) * 60_000 + + int.Parse(colonParts[1]) * 1_000 + + ms, + _ => 0, + }; } /// Strips VTT inline markup tags (speaker tags, timestamp tags, class tags). diff --git a/src/TextServices.Core/Providers/W3cAnnotationTextFormatProvider.cs b/src/TextServices.Core/Providers/W3cAnnotationTextFormatProvider.cs index 613a3e9..6bf5a37 100644 --- a/src/TextServices.Core/Providers/W3cAnnotationTextFormatProvider.cs +++ b/src/TextServices.Core/Providers/W3cAnnotationTextFormatProvider.cs @@ -1,5 +1,7 @@ using System.Globalization; using System.Text.Json; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Core.Models; namespace TextServices.Core.Providers; @@ -15,8 +17,11 @@ namespace TextServices.Core.Providers; /// All words within one annotation share the same bounding box (or time range) /// and the same line number, so they coalesce correctly in search results. /// -public class W3cAnnotationTextFormatProvider : IStringFormatProvider +public class W3cAnnotationTextFormatProvider(ILogger? logger = null) : IStringFormatProvider { + private readonly ILogger _logger = + logger ?? NullLogger.Instance; + /// /// Internal sentinel value written into PageInstruction.Format by /// ManifestReducer when an externally-referenced AnnotationPage is @@ -45,7 +50,11 @@ public void ProcessPage( foreach (var anno in items.EnumerateArray()) { - if (!HasSupplementingMotivation(anno)) continue; + if (!HasSupplementingMotivation(anno)) + { + _logger.LogTrace("Skipped annotation without 'supplementing' motivation on page '{Id}'", imageIdentifier); + continue; + } if (!anno.TryGetProperty("body", out var bodyEl)) continue; var body = bodyEl.ValueKind == JsonValueKind.Array @@ -65,7 +74,11 @@ public void ProcessPage( if (!anno.TryGetProperty("target", out var targetEl)) continue; var target = ParseTarget(targetEl); - if (target == null) continue; + if (target == null) + { + _logger.LogDebug("Could not parse target for annotation on page '{Id}'; skipping", imageIdentifier); + continue; + } entries.Add((text, target.Value)); } @@ -180,7 +193,7 @@ public void ProcessPage( if (coords.StartsWith("pixel:", StringComparison.OrdinalIgnoreCase)) coords = coords[6..]; else if (coords.StartsWith("percent:", StringComparison.OrdinalIgnoreCase)) - return null; // percentage coordinates not supported + return null; // percentage coordinates not supported — logged by caller var parts = coords.Split(','); if (parts.Length != 4) return null; diff --git a/src/TextServices.Core/TextServices.Core.csproj b/src/TextServices.Core/TextServices.Core.csproj index b35d7ab..19b801a 100644 --- a/src/TextServices.Core/TextServices.Core.csproj +++ b/src/TextServices.Core/TextServices.Core.csproj @@ -7,6 +7,7 @@ + diff --git a/src/TextServices.Demo/wwwroot/js/builder.js b/src/TextServices.Demo/wwwroot/js/builder.js index 08386ae..ab7c8bc 100644 --- a/src/TextServices.Demo/wwwroot/js/builder.js +++ b/src/TextServices.Demo/wwwroot/js/builder.js @@ -96,7 +96,6 @@ async function refreshJobs() { let apiJobs = []; let apiReachable = true; try { - // TODO: add paging controls — pageSize=100 is a temporary workaround const res = await fetch(`${config.builderApi}/textbuilder?pageSize=100`); if (res.ok) { const data = await res.json(); diff --git a/src/TextServices.Infrastructure/Http/UserAgents.cs b/src/TextServices.Infrastructure/Http/UserAgents.cs new file mode 100644 index 0000000..2f54846 --- /dev/null +++ b/src/TextServices.Infrastructure/Http/UserAgents.cs @@ -0,0 +1,9 @@ +namespace TextServices.Infrastructure.Http; + +public static class UserAgents +{ + private const string Suffix = " (+https://github.com/dlcs/text-services)"; + + public const string Builder = "TextServices-Builder/1.0" + Suffix; + public const string Search = "TextServices-Search/1.0" + Suffix; +} diff --git a/src/TextServices.Infrastructure/SearchApiRoutes.cs b/src/TextServices.Infrastructure/SearchApiRoutes.cs new file mode 100644 index 0000000..486ccfb --- /dev/null +++ b/src/TextServices.Infrastructure/SearchApiRoutes.cs @@ -0,0 +1,25 @@ +namespace TextServices.Infrastructure; + +/// +/// Builds absolute URLs for all Search API endpoints. A single place to update if any +/// route segment changes. +/// +public static class SearchApiRoutes +{ + public static string SearchV1(string baseUrl, string id) => $"{baseUrl}/search/v1/{id}"; + public static string SearchV2(string baseUrl, string id) => $"{baseUrl}/search/v2/{id}"; + public static string AutocompleteV1(string baseUrl, string id) => $"{baseUrl}/autocomplete/v1/{id}"; + public static string AutocompleteV2(string baseUrl, string id) => $"{baseUrl}/autocomplete/v2/{id}"; + public static string FullText(string baseUrl, string id) => $"{baseUrl}/text/v1/{id}"; + public static string Pdf(string baseUrl, string id) => $"{baseUrl}/pdf/v1/{id}"; + public static string TextAugmented(string baseUrl, string id) => $"{baseUrl}/text-augmented/v3/{id}"; + public static string AnnotationsManifest(string baseUrl, string id) => $"{baseUrl}/annotations/manifest/v1/{id}"; + + public static string AnnotationsLines(string baseUrl, string id, int canvasIndex) => + $"{baseUrl}/annotations/lines/v1/{canvasIndex}/{id}"; + + public static string AnnotationsWords(string baseUrl, string id, int canvasIndex) => + $"{baseUrl}/annotations/words/v1/{canvasIndex}/{id}"; + + public static string Figures(string baseUrl, string id) => $"{baseUrl}/identified/figures/{id}"; +} diff --git a/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs b/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs index 1291721..e63f628 100644 --- a/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs +++ b/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs @@ -29,9 +29,6 @@ public class SearchApiOptions /// public int CacheMaxEntries { get; set; } = 20; - /// Root path of the filesystem text store (must match the Builder API's storage path). - public string StorageRootPath { get; set; } = "textservices-data"; - /// Maximum number of background PDF trigger requests to queue. Requests beyond this capacity return 503. public int PdfTriggerQueueCapacity { get; set; } = 50; @@ -65,4 +62,31 @@ public class SearchApiOptions /// X-Forwarded-Host is never honoured. /// public string[] AllowedCustomHosts { get; set; } = []; + + /// Options for the text artefact store. + public SearchStorageOptions Storage { get; set; } = new(); +} + +public class SearchStorageOptions +{ + /// Options for the filesystem text store. + public FileSystemStorageOptions FileSystem { get; set; } = new(); + + /// Options for S3 storage. When is set, S3 is used instead of the filesystem. + public S3StorageOptions S3 { get; set; } = new(); +} + +public class FileSystemStorageOptions +{ + /// Root directory for stored text artefacts. + public string RootPath { get; set; } = "textservices-data"; +} + +public class S3StorageOptions +{ + /// S3 bucket for stored artefacts. When set, the S3 store is used instead of the filesystem store. + public string BucketName { get; set; } = string.Empty; + + /// Optional prefix for all S3 object keys (e.g. "textservices/"). A trailing / is added automatically if omitted. + public string KeyPrefix { get; set; } = string.Empty; } diff --git a/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs b/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs index da74253..6388993 100644 --- a/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs +++ b/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs @@ -1,6 +1,7 @@ using Microsoft.AspNetCore.HttpOverrides; using Serilog; using Serilog.Extensions.Logging; +using TextServices.Infrastructure.Http; using TextServices.Pdf; using TextServices.Search.Api.Features.Pdf; @@ -18,7 +19,7 @@ public static IServiceCollection AddPdfServices(this IServiceCollection services .ConfigureHttpClient(c => { c.Timeout = TimeSpan.FromSeconds(60); - c.DefaultRequestHeaders.UserAgent.ParseAdd("TextServices/1.0 (+https://github.com/dlcs/text-services)"); + c.DefaultRequestHeaders.UserAgent.ParseAdd(UserAgents.Search); }); return services; diff --git a/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs b/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs index cc20fae..f45aa11 100644 --- a/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs @@ -8,7 +8,7 @@ internal static class AnnotationEndpoints { internal static IEndpointRouteBuilder MapAnnotationEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/annotations/manifest/v1/{**id}", async ( + routes.MapGet("/annotations/manifest/v1/{*id:minlength(1)}", async ( string id, ISender sender, IOptions options, @@ -20,7 +20,7 @@ internal static IEndpointRouteBuilder MapAnnotationEndpoints(this IEndpointRoute return Results.Json(result, contentType: "application/ld+json"); }); - routes.MapGet("/annotations/lines/v1/{n:int}/{**id}", async ( + routes.MapGet("/annotations/lines/v1/{n:int}/{*id:minlength(1)}", async ( int n, string id, ISender sender, IOptions options, @@ -32,7 +32,7 @@ internal static IEndpointRouteBuilder MapAnnotationEndpoints(this IEndpointRoute return Results.Json(result, contentType: "application/ld+json"); }); - routes.MapGet("/annotations/words/v1/{n:int}/{**id}", async ( + routes.MapGet("/annotations/words/v1/{n:int}/{*id:minlength(1)}", async ( int n, string id, ISender sender, IOptions options, diff --git a/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs b/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs index 30b2ed9..241c3dd 100644 --- a/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs @@ -8,7 +8,7 @@ internal static class AutocompleteEndpoints { internal static IEndpointRouteBuilder MapAutocompleteEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/autocomplete/v1/{**id}", async ( + routes.MapGet("/autocomplete/v1/{*id:minlength(1)}", async ( string id, string? q, ISender sender, IOptions options, @@ -20,7 +20,7 @@ internal static IEndpointRouteBuilder MapAutocompleteEndpoints(this IEndpointRou return Results.Json(result, contentType: "application/ld+json"); }); - routes.MapGet("/autocomplete/v2/{**id}", async ( + routes.MapGet("/autocomplete/v2/{*id:minlength(1)}", async ( string id, string? q, ISender sender, IOptions options, diff --git a/src/TextServices.Search.Api/Features/EndpointHelpers.cs b/src/TextServices.Search.Api/Features/EndpointHelpers.cs index 5e97e58..1298a51 100644 --- a/src/TextServices.Search.Api/Features/EndpointHelpers.cs +++ b/src/TextServices.Search.Api/Features/EndpointHelpers.cs @@ -2,19 +2,6 @@ namespace TextServices.Search.Api.Features; -/// -/// Holds the resolved URL components for a single endpoint request, accounting for -/// X-Forwarded-Host and X-Forwarded-Path proxy headers. -/// -/// -/// Job id to use in generated IIIF URLs. Extracted from X-Forwarded-Path when a -/// whitelisted host is present; otherwise equals the original route id. -/// -/// Absolute URL for this endpoint response (base + route prefix + effective id + query). -/// Absolute URL without query string. Used as the base for child resource IDs (e.g. annotations). -/// Scheme + authority only (no path). Used by TextAugmented to build cross-endpoint service URLs. -internal record ResolvedRequest(string EffectiveId, string SelfUrl, string ResourceUrl, string BaseUrl); - internal static class EndpointHelpers { /// diff --git a/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs b/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs index fcf84ed..25a9ab0 100644 --- a/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs @@ -8,7 +8,7 @@ internal static class FiguresEndpoints { internal static IEndpointRouteBuilder MapFiguresEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/identified/figures/{**id}", async ( + routes.MapGet("/identified/figures/{*id:minlength(1)}", async ( string id, ISender sender, IOptions options, diff --git a/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs b/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs index 00df646..faf4e4c 100644 --- a/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs @@ -8,7 +8,7 @@ internal static class PdfEndpoints { internal static IEndpointRouteBuilder MapPdfEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/pdf/v1/{**id}", async (string id, ISender sender) => + routes.MapGet("/pdf/v1/{*id:minlength(1)}", async (string id, ISender sender) => { id = StripPdfExtension(id); var stream = await sender.Send(new PdfRequest(id)); @@ -16,7 +16,7 @@ internal static IEndpointRouteBuilder MapPdfEndpoints(this IEndpointRouteBuilder return Results.Stream(stream, "application/pdf", enableRangeProcessing: false); }); - routes.MapPost("/pdf/v1/{**id}", async ( + routes.MapPost("/pdf/v1/{*id:minlength(1)}", async ( string id, ISender sender, IOptions options, diff --git a/src/TextServices.Search.Api/Features/Pdf/PdfGenerationService.cs b/src/TextServices.Search.Api/Features/Pdf/PdfGenerationService.cs index 72b65a6..6930362 100644 --- a/src/TextServices.Search.Api/Features/Pdf/PdfGenerationService.cs +++ b/src/TextServices.Search.Api/Features/Pdf/PdfGenerationService.cs @@ -38,14 +38,14 @@ private async Task GenerateAsync(string id, CancellationToken ct) var text = await textStore.LoadText(id); if (text == null) { - logger.LogWarning("PDF generation: no Text artefact for {Id}", id); + logger.LogWarning("No Text artefact for {Id}", id); return; } var manifestJson = await textStore.LoadManifest(id); if (manifestJson == null) { - logger.LogWarning("PDF generation: no Manifest for {Id}", id); + logger.LogWarning("No Manifest for {Id}", id); return; } diff --git a/src/TextServices.Search.Api/Features/PlainText/PlainTextEndpoints.cs b/src/TextServices.Search.Api/Features/PlainText/PlainTextEndpoints.cs index 4599474..14c91df 100644 --- a/src/TextServices.Search.Api/Features/PlainText/PlainTextEndpoints.cs +++ b/src/TextServices.Search.Api/Features/PlainText/PlainTextEndpoints.cs @@ -6,7 +6,7 @@ internal static class PlainTextEndpoints { internal static IEndpointRouteBuilder MapPlainTextEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/text/v1/{**id}", async (string id, ISender sender) => + routes.MapGet("/text/v1/{*id:minlength(1)}", async (string id, ISender sender) => { var result = await sender.Send(new RawTextRequest(id)); if (result == null) return Results.NotFound(); diff --git a/src/TextServices.Search.Api/Features/ResolvedRequest.cs b/src/TextServices.Search.Api/Features/ResolvedRequest.cs new file mode 100644 index 0000000..008e994 --- /dev/null +++ b/src/TextServices.Search.Api/Features/ResolvedRequest.cs @@ -0,0 +1,48 @@ +using TextServices.Infrastructure; + +namespace TextServices.Search.Api.Features; + +/// +/// Holds the resolved URL components for a single endpoint request, accounting for +/// X-Forwarded-Host and X-Forwarded-Path proxy headers. +/// +/// +/// Job id to use in generated IIIF URLs. Extracted from X-Forwarded-Path when a +/// whitelisted host is present; otherwise equals the original route id. +/// +/// Absolute URL for this endpoint response (base + route prefix + effective id + query). +/// Absolute URL without query string. Used as the base for child resource IDs (e.g. annotations). +/// Scheme + authority only (no path). Configured URL or based on x-forwarded-host if valid. +internal record ResolvedRequest(string EffectiveId, string SelfUrl, string ResourceUrl, string BaseUrl); + +/// +/// Extension methods for to ease route generation +/// +internal static class ResolvedRequestExtensions +{ + public static string SearchV1Url(this ResolvedRequest r) => SearchApiRoutes.SearchV1(r.BaseUrl, r.EffectiveId); + public static string SearchV2Url(this ResolvedRequest r) => SearchApiRoutes.SearchV2(r.BaseUrl, r.EffectiveId); + + public static string AutocompleteV1Url(this ResolvedRequest r) => + SearchApiRoutes.AutocompleteV1(r.BaseUrl, r.EffectiveId); + + public static string AutocompleteV2Url(this ResolvedRequest r) => + SearchApiRoutes.AutocompleteV2(r.BaseUrl, r.EffectiveId); + + public static string FullTextUrl(this ResolvedRequest r) => SearchApiRoutes.FullText(r.BaseUrl, r.EffectiveId); + public static string PdfUrl(this ResolvedRequest r) => SearchApiRoutes.Pdf(r.BaseUrl, r.EffectiveId); + + public static string TextAugmentedUrl(this ResolvedRequest r) => + SearchApiRoutes.TextAugmented(r.BaseUrl, r.EffectiveId); + + public static string AnnotationsManifestUrl(this ResolvedRequest r) => + SearchApiRoutes.AnnotationsManifest(r.BaseUrl, r.EffectiveId); + + public static string AnnotationsLinesUrl(this ResolvedRequest r, int i) => + SearchApiRoutes.AnnotationsLines(r.BaseUrl, r.EffectiveId, i); + + public static string AnnotationsWordsUrl(this ResolvedRequest r, int i) => + SearchApiRoutes.AnnotationsWords(r.BaseUrl, r.EffectiveId, i); + + public static string FiguresUrl(this ResolvedRequest r) => SearchApiRoutes.Figures(r.BaseUrl, r.EffectiveId); +} diff --git a/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs b/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs index c6b480b..78045ef 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs @@ -8,7 +8,7 @@ internal static class SearchEndpoints { internal static IEndpointRouteBuilder MapSearchEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/search/v1/{**id}", async ( + routes.MapGet("/search/v1/{*id:minlength(1)}", async ( string id, string? q, ISender sender, IOptions options, @@ -21,7 +21,7 @@ internal static IEndpointRouteBuilder MapSearchEndpoints(this IEndpointRouteBuil return Results.Json(result, contentType: "application/ld+json"); }); - routes.MapGet("/search/v2/{**id}", async ( + routes.MapGet("/search/v2/{*id:minlength(1)}", async ( string id, string? q, ISender sender, IOptions options, diff --git a/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs b/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs index db9bf17..0d976ff 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs @@ -4,21 +4,30 @@ namespace TextServices.Search.Api.Features.Search; -public abstract class SearchHandlerBase(ITextCache cache) +public abstract class SearchHandlerBase(ITextCache cache, ILogger logger) { protected async Task HandleCore(string id, string query, string selfUrl, string resourceUrl, CancellationToken ct) { - if (!await cache.IsEnabledAsync(id, JobServices.Search, ct)) return default; + if (!await cache.IsEnabledAsync(id, JobServices.Search, ct)) + { + logger.LogDebug("Search is disabled: {Id}", id); + return default; + } - if (string.IsNullOrWhiteSpace(query)) - return EmptyQueryResponse(selfUrl, resourceUrl); + if (string.IsNullOrWhiteSpace(query)) return EmptyQueryResponse(selfUrl); var text = await cache.GetTextAsync(id, ct); - if (text == null) return default; + if (text == null) + { + logger.LogInformation("No text found: {Id}", id); + return default; + } + + logger.LogDebug("Searching {Id} for {Query}", id, query); return BuildResponse(text, text.Search(query), selfUrl, resourceUrl); } - protected abstract TResponse EmptyQueryResponse(string selfUrl, string resourceUrl); + protected abstract TResponse EmptyQueryResponse(string selfUrl); protected abstract TResponse BuildResponse(Text text, List rects, string selfUrl, string resourceUrl); } diff --git a/src/TextServices.Search.Api/Features/Search/SearchQuery.cs b/src/TextServices.Search.Api/Features/Search/SearchQuery.cs index 972fd4e..18d38de 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchQuery.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchQuery.cs @@ -5,18 +5,20 @@ namespace TextServices.Search.Api.Features.Search; -public record SearchRequest(string Id, string Query, string SelfUrl, string ResourceUrl) : IRequest; +public record SearchRequest(string Id, string Query, string SelfUrl, string ResourceUrl) + : IRequest; -public class SearchHandler(ITextCache cache) - : SearchHandlerBase(cache), IRequestHandler +public class SearchHandler(ITextCache cache, ILogger logger) + : SearchHandlerBase(cache, logger), IRequestHandler { public Task Handle(SearchRequest request, CancellationToken ct) => HandleCore(request.Id, request.Query, request.SelfUrl, request.ResourceUrl, ct); - protected override SearchAnnotationList EmptyQueryResponse(string selfUrl, string resourceUrl) => + protected override SearchAnnotationList EmptyQueryResponse(string selfUrl) => new() { Id = selfUrl, Within = new SearchLayer { Total = 0 }, Resources = [], Hits = [] }; - protected override SearchAnnotationList BuildResponse(Text text, List rects, string selfUrl, string resourceUrl) + protected override SearchAnnotationList BuildResponse(Text text, List rects, string selfUrl, + string resourceUrl) { var resources = new List(rects.Count); var hits = new List(); diff --git a/src/TextServices.Search.Api/Features/Search/SearchV2Query.cs b/src/TextServices.Search.Api/Features/Search/SearchV2Query.cs index ae7c0dd..0a5ae98 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchV2Query.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchV2Query.cs @@ -6,18 +6,20 @@ namespace TextServices.Search.Api.Features.Search; -public record SearchV2Request(string Id, string Query, string SelfUrl, string ResourceUrl) : IRequest; +public record SearchV2Request(string Id, string Query, string SelfUrl, string ResourceUrl) + : IRequest; -public class SearchV2Handler(ITextCache cache) - : SearchHandlerBase(cache), IRequestHandler +public class SearchV2Handler(ITextCache cache, ILogger logger) + : SearchHandlerBase(cache, logger), IRequestHandler { public Task Handle(SearchV2Request request, CancellationToken ct) => HandleCore(request.Id, request.Query, request.SelfUrl, request.ResourceUrl, ct); - protected override SearchAnnotationPageV2 EmptyQueryResponse(string selfUrl, string resourceUrl) => + protected override SearchAnnotationPageV2 EmptyQueryResponse(string selfUrl) => new() { Id = selfUrl, Items = [], Annotations = null }; - protected override SearchAnnotationPageV2 BuildResponse(Text text, List rects, string selfUrl, string resourceUrl) + protected override SearchAnnotationPageV2 BuildResponse(Text text, List rects, string selfUrl, + string resourceUrl) { var items = new List(rects.Count); var contexts = new List(); @@ -64,9 +66,8 @@ protected override SearchAnnotationPageV2 BuildResponse(Text text, List + new() { Id = id, Target = new SpecificResourceTarget @@ -124,5 +123,4 @@ private static ContextualizingAnnotation MakeContextualizing( ], }, }; - } } diff --git a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedEndpoints.cs b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedEndpoints.cs index d008cb2..9cf9311 100644 --- a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedEndpoints.cs +++ b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedEndpoints.cs @@ -8,14 +8,14 @@ internal static class TextAugmentedEndpoints { internal static IEndpointRouteBuilder MapTextAugmentedEndpoints(this IEndpointRouteBuilder routes) { - routes.MapGet("/text-augmented/v3/{**id}", async ( + routes.MapGet("/text-augmented/v3/{*id:minlength(1)}", async ( string id, ISender sender, IOptions options, HttpContext ctx) => { var resolved = EndpointHelpers.Resolve(options.Value, ctx, "text-augmented/v3/", id); - var result = await sender.Send(new TextAugmentedRequest(id, resolved.SelfUrl, resolved.BaseUrl, UrlId: resolved.EffectiveId)); + var result = await sender.Send(new TextAugmentedRequest(id, resolved)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); diff --git a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs deleted file mode 100644 index c295965..0000000 --- a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs +++ /dev/null @@ -1,239 +0,0 @@ -using System.Text.Json.Nodes; -using MediatR; -using TextServices.Search.Api.Services; -using TextServices.Storage; - -namespace TextServices.Search.Api.Features.TextAugmented; - -/// -/// Returns the stored IIIF Manifest JSON decorated with IIIF Search service descriptors -/// for both v2 and v1. Services use the IIIF Presentation 3 id/type conventions: -/// type "SearchService2" / "AutoCompleteService2" (IIIF Search 2) -/// type "SearchService1" / "AutoCompleteService1" (IIIF Search 1, legacy) -/// No @context is emitted inside service blocks — it belongs only at document level. -/// -/// Storage key used to load artefacts from the text store. -/// -/// Id to use when generating IIIF service URLs. Differs from when the -/// request arrived via a proxy that rewrites the path (X-Forwarded-Path). Defaults to -/// when null. -/// -public record TextAugmentedRequest(string Id, string SelfUrl, string SearchBaseUrl, string? UrlId = null) - : IRequest; - -public class TextAugmentedHandler(ITextStore textStore, ITextCache textCache) - : IRequestHandler -{ - public async Task Handle(TextAugmentedRequest request, CancellationToken ct) - { - if (!await textCache.IsEnabledAsync(request.Id, JobServices.TextAugmented, ct)) return null; - var json = await textStore.LoadManifest(request.Id); - if (json == null) return null; - - var node = JsonNode.Parse(json); - if (node is not JsonObject manifest) return null; - - // Replace @id / id with the text-augmented URL for this manifest. - if (manifest.ContainsKey("@id")) - manifest["@id"] = request.SelfUrl; - else - manifest["id"] = request.SelfUrl; - - // Build service descriptors using Presentation 3 id/type conventions. - // v2 is listed first; v1 follows for backward-compatible clients. - - // TODO - de-duplicate when adding services - var base_ = request.SearchBaseUrl; - var id = request.UrlId ?? request.Id; - - var searchServiceV2 = new JsonObject - { - ["id"] = $"{base_}/search/v2/{id}", // TODO - can we build these from a central place? - ["type"] = "SearchService2", - ["service"] = new JsonArray(new JsonObject - { - ["id"] = $"{base_}/autocomplete/v2/{id}", - ["type"] = "AutoCompleteService2", - }), - }; - - var searchServiceV1 = new JsonObject - { - ["id"] = $"{base_}/search/v1/{id}", - ["type"] = "SearchService1", - ["profile"] = "http://iiif.io/api/search/1/search", - ["service"] = new JsonArray(new JsonObject - { - ["id"] = $"{base_}/autocomplete/v1/{id}", - ["type"] = "AutoCompleteService1", - ["profile"] = "http://iiif.io/api/search/1/autocomplete", - }), - }; - - // Insert v2 then v1 at position 0 so v2 appears first. - // Existing services (e.g. IIIF Image services) are pushed down, not displaced. - if (manifest["service"] is JsonArray existingArray) - { - existingArray.Insert(0, searchServiceV1); - existingArray.Insert(0, searchServiceV2); - } - else if (manifest["service"] is JsonObject existingObject) - { - // Spec allows service to be a single object — promote to array. - manifest["service"] = new JsonArray( - searchServiceV2, searchServiceV1, existingObject.DeepClone()); - } - else - { - manifest["service"] = new JsonArray(searchServiceV2, searchServiceV1); - } - - // ---- rendering links (PDF + plain text) --------------------------------- - // Plain text is always added when text artefacts exist. - // PDF is only added when at least one canvas is image-based (IsTemporalContent == false); - // temporal-only manifests (VTT/audio/video) have no page images to render into a PDF. - var text = await textCache.GetTextAsync(request.Id, ct); - if (text != null) - { - var hasImageCanvases = text.Images.Any(img => !img.IsTemporalContent); - - var textRef = new JsonObject - { - ["id"] = $"{base_}/text/v1/{id}", - ["type"] = "Text", - ["label"] = new JsonObject { ["en"] = new JsonArray("View as plain text") }, - ["format"] = "text/plain", - }; - - if (manifest["rendering"] is JsonArray existingRendering) - { - existingRendering.Insert(0, textRef); - if (hasImageCanvases) - { - var pdfRef = BuildPdfRef(base_, id); - existingRendering.Insert(0, pdfRef); - } - } - else if (manifest["rendering"] is JsonObject singleRendering) - { - manifest["rendering"] = hasImageCanvases - ? new JsonArray(BuildPdfRef(base_, id), textRef, singleRendering.DeepClone()) - : new JsonArray(textRef, singleRendering.DeepClone()); - } - else - { - manifest["rendering"] = hasImageCanvases - ? new JsonArray(BuildPdfRef(base_, id), textRef) - : new JsonArray(textRef); - } - } - - // ---- per-canvas line and word annotation page references ---------------- - // Inject line-level and word-level annotation page links into each canvas's - // annotations array so harvesting clients can discover and fetch them. - // Only canvases that actually have words are decorated; sparse canvases are skipped. - if (text != null && manifest["items"] is JsonArray canvases) - { - // Build a set of canvas indices that have at least one word, in O(words). - var canvasesWithWords = text.Words.Values.Select(w => w.Idx).ToHashSet(); - - for (var i = 0; i < canvases.Count && i < text.Images.Length; i++) - { - if (!canvasesWithWords.Contains(i)) continue; - if (canvases[i] is not JsonObject canvas) continue; - - var linesRef = new JsonObject - { - ["id"] = $"{base_}/annotations/lines/v1/{i}/{id}", - ["type"] = "AnnotationPage", - ["label"] = new JsonObject { ["en"] = new JsonArray("Line-level transcription") }, - }; - var wordsRef = new JsonObject - { - ["id"] = $"{base_}/annotations/words/v1/{i}/{id}", - ["type"] = "AnnotationPage", - ["label"] = new JsonObject { ["en"] = new JsonArray("Word-level transcription") }, - }; - - if (canvas["annotations"] is JsonArray existingAnnos) - { - existingAnnos.Insert(0, wordsRef); - existingAnnos.Insert(0, linesRef); - } - else if (canvas["annotations"] is JsonObject singleAnno) - { - canvas["annotations"] = new JsonArray( - linesRef, wordsRef, singleAnno.DeepClone()); - } - else - { - canvas["annotations"] = new JsonArray(linesRef, wordsRef); - } - } - } - - // ---- manifest-level line annotations reference -------------------------- - // If the builder stored an annotations.json (all canvases, line granularity), - // add a manifest-level annotations reference so clients can discover it. - var annotationsUrl = $"{base_}/annotations/manifest/v1/{id}"; - var annotationsJson = await textStore.LoadAnnotations(request.Id); - if (annotationsJson != null) - { - var annotationsRef = new JsonObject - { - ["id"] = annotationsUrl, - ["type"] = "AnnotationPage", - ["profile"] = "https://dlcs.io/profiles/all-text", - ["label"] = new JsonObject { ["en"] = new JsonArray("Text of all canvases") }, - }; - - if (manifest["annotations"] is JsonArray existingAnnos) - existingAnnos.Insert(0, annotationsRef); - else if (manifest["annotations"] is JsonObject singleAnno) - manifest["annotations"] = new JsonArray(annotationsRef, singleAnno.DeepClone()); - else - manifest["annotations"] = new JsonArray(annotationsRef); - } - - // ---- figures annotation page reference ---------------------------------- - // If the builder stored a figures.json (ComposedBlocks with non-zero area), - // add a manifest-level annotations reference so clients can discover it. - var figuresUrl = $"{base_}/identified/figures/{id}"; - var figuresJson = await textStore.LoadFigures(request.Id); - if (figuresJson != null) - { - var figuresRef = new JsonObject - { - ["id"] = figuresUrl, - ["type"] = "AnnotationPage", - ["label"] = new JsonObject - { - ["en"] = new JsonArray("Figures, tables and illustrations"), - }, - }; - - if (manifest["annotations"] is JsonArray existingAnnos) - { - existingAnnos.Add(figuresRef); - } - else if (manifest["annotations"] is JsonObject singleAnno) - { - manifest["annotations"] = new JsonArray(singleAnno.DeepClone(), figuresRef); - } - else - { - manifest["annotations"] = new JsonArray(figuresRef); - } - } - - return manifest; - } - - private static JsonObject BuildPdfRef(string baseUrl, string id) => new() - { - ["id"] = $"{baseUrl}/pdf/v1/{id}", - ["type"] = "Text", - ["label"] = new JsonObject { ["en"] = new JsonArray("Download as PDF") }, - ["format"] = "application/pdf", - }; -} diff --git a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedRequest.cs b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedRequest.cs new file mode 100644 index 0000000..4cc4ef0 --- /dev/null +++ b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedRequest.cs @@ -0,0 +1,308 @@ +using System.Text.Json.Nodes; +using MediatR; +using TextServices.Core.Models; +using TextServices.Search.Api.Services; +using TextServices.Storage; + + +namespace TextServices.Search.Api.Features.TextAugmented; + +/// +/// Returns the stored IIIF Manifest JSON decorated with IIIF Search service descriptors +/// for both v2 and v1. Services use the IIIF Presentation 3 id/type conventions: +/// type "SearchService2" / "AutoCompleteService2" (IIIF Search 2) +/// type "SearchService1" / "AutoCompleteService1" (IIIF Search 1, legacy) +/// No @context is emitted inside service blocks — it belongs only at document level. +/// +/// Storage key used to load artefacts from the text store. +/// +/// Resolved URL components for this request. is used +/// for generated IIIF service URLs; it may differ from when the request arrived +/// via a proxy that rewrites the path (X-Forwarded-Path). +/// +internal record TextAugmentedRequest(string Id, ResolvedRequest Resolved) + : IRequest; + +internal class TextAugmentedHandler(ITextStore textStore, ITextCache textCache, ILogger logger) + : IRequestHandler +{ + public async Task Handle(TextAugmentedRequest request, CancellationToken ct) + { + if (!await textCache.IsEnabledAsync(request.Id, JobServices.TextAugmented, ct)) + { + logger.LogDebug("Text augmentation is not enabled: {Id}", request.Id); + return null; + } + var json = await textStore.LoadManifest(request.Id); + if (json == null) + { + logger.LogDebug("Manifest not found: {Id}", request.Id); + return null; + } + + var node = JsonNode.Parse(json); + if (node is not JsonObject manifest) + { + logger.LogDebug("Manifest not JSON: {Id}", request.Id); + return null; + } + + var text = await textCache.GetTextAsync(request.Id, ct); + + var resolvedRequestUris = request.Resolved; + ReplaceSelfUrl(manifest, resolvedRequestUris.SelfUrl); + if (text != null) + { + InjectSearchServices(manifest, resolvedRequestUris); + InjectRenderingLinks(manifest, resolvedRequestUris, text); + InjectCanvasAnnotationRefs(manifest, resolvedRequestUris, text); + } + else + { + logger.LogDebug("Text not found: {Id}", request.Id); + } + + await InjectManifestAnnotationsRefAsync(manifest, resolvedRequestUris, request.Id); + await InjectFiguresRefAsync(manifest, resolvedRequestUris, request.Id); + + return manifest; + } + + private static void ReplaceSelfUrl(JsonObject manifest, string selfUrl) + { + if (manifest.ContainsKey("@id")) + manifest["@id"] = selfUrl; + else + manifest["id"] = selfUrl; + } + + private void InjectSearchServices(JsonObject manifest, ResolvedRequest resolved) + { + logger.LogDebug("Adding search-services: {Id}", resolved.EffectiveId); + var searchServiceV2 = new JsonObject + { + ["id"] = resolved.SearchV2Url(), + ["type"] = "SearchService2", + ["service"] = new JsonArray(new JsonObject + { + ["id"] = resolved.AutocompleteV2Url(), + ["type"] = "AutoCompleteService2", + }), + }; + + var searchServiceV1 = new JsonObject + { + ["id"] = resolved.SearchV1Url(), + ["type"] = "SearchService1", + ["profile"] = "http://iiif.io/api/search/1/search", + ["service"] = new JsonArray(new JsonObject + { + ["id"] = resolved.AutocompleteV1Url(), + ["type"] = "AutoCompleteService1", + ["profile"] = "http://iiif.io/api/search/1/autocomplete", + }), + }; + + // Insert v2 then v1 at position 0 so v2 appears first. + // Existing services (e.g. IIIF Image services) are pushed down, not displaced. + // Deduplication: skip insertion when a service with the same "id" already exists. + if (manifest["service"] is JsonArray existingArray) + { + AddIfNew(existingArray, searchServiceV1); + AddIfNew(existingArray, searchServiceV2); + } + else if (manifest["service"] is JsonObject existingObject) + { + // Spec allows service to be a single object — promote to array. + var arr = new JsonArray(existingObject.DeepClone()); + AddIfNew(arr, searchServiceV1); + AddIfNew(arr, searchServiceV2); + manifest["service"] = arr; + } + else + { + manifest["service"] = new JsonArray(searchServiceV2, searchServiceV1); + } + + AppendContext(manifest, "http://iiif.io/api/search/2/context.json"); + AppendContext(manifest, "http://iiif.io/api/search/1/context.json"); + } + + private void InjectRenderingLinks(JsonObject manifest, ResolvedRequest resolved, Text text) + { + logger.LogDebug("Adding rendering: {Id}", resolved.EffectiveId); + + // PDF is only added when at least one canvas is image-based (IsTemporalContent == false); + // temporal-only manifests (VTT/audio/video) have no page images to render into a PDF. + var hasImageCanvases = text.Images.Any(img => !img.IsTemporalContent); + + var textRef = new JsonObject + { + ["id"] = resolved.FullTextUrl(), + ["type"] = "Text", + ["label"] = new JsonObject { ["en"] = new JsonArray("View as plain text") }, + ["format"] = "text/plain", + }; + + if (manifest["rendering"] is JsonArray existingRendering) + { + AddIfNew(existingRendering, textRef); + if (hasImageCanvases) AddIfNew(existingRendering, BuildPdfRef(resolved)); + } + else if (manifest["rendering"] is JsonObject singleRendering) + { + var arr = new JsonArray(singleRendering.DeepClone()); + AddIfNew(arr, textRef); + if (hasImageCanvases) AddIfNew(arr, BuildPdfRef(resolved)); + manifest["rendering"] = arr; + } + else + { + manifest["rendering"] = hasImageCanvases + ? new JsonArray(BuildPdfRef(resolved), textRef) + : new JsonArray(textRef); + } + } + + private void InjectCanvasAnnotationRefs(JsonObject manifest, ResolvedRequest resolved, Text text) + { + if (manifest["items"] is not JsonArray canvases) return; + + logger.LogDebug("Adding canvas annotations: {Id}", resolved.EffectiveId); + + // Build a set of canvas indices that have at least one word, in O(words). + var canvasesWithWords = text.Words.Values.Select(w => w.Idx).ToHashSet(); + + for (var i = 0; i < canvases.Count && i < text.Images.Length; i++) + { + if (!canvasesWithWords.Contains(i)) continue; + if (canvases[i] is not JsonObject canvas) continue; + + var linesRef = new JsonObject + { + ["id"] = resolved.AnnotationsLinesUrl(i), + ["type"] = "AnnotationPage", + ["label"] = new JsonObject { ["en"] = new JsonArray("Line-level transcription") }, + }; + var wordsRef = new JsonObject + { + ["id"] = resolved.AnnotationsWordsUrl(i), + ["type"] = "AnnotationPage", + ["label"] = new JsonObject { ["en"] = new JsonArray("Word-level transcription") }, + }; + + if (canvas["annotations"] is JsonArray existingAnnos) + { + AddIfNew(existingAnnos, wordsRef); + AddIfNew(existingAnnos, linesRef); + } + else if (canvas["annotations"] is JsonObject singleAnno) + { + var arr = new JsonArray(singleAnno.DeepClone()); + AddIfNew(arr, wordsRef); + AddIfNew(arr, linesRef); + canvas["annotations"] = arr; + } + else + { + canvas["annotations"] = new JsonArray(linesRef, wordsRef); + } + } + } + + private async Task InjectManifestAnnotationsRefAsync(JsonObject manifest, ResolvedRequest resolved, string key) + { + var annotationsJson = await textStore.LoadAnnotations(key); + if (annotationsJson == null) return; + + logger.LogDebug("Adding Manifest annotations: {Id}", resolved.EffectiveId); + + var annotationsRef = new JsonObject + { + ["id"] = resolved.AnnotationsManifestUrl(), + ["type"] = "AnnotationPage", + ["profile"] = "https://dlcs.io/profiles/all-text", + ["label"] = new JsonObject { ["en"] = new JsonArray("Text of all canvases") }, + }; + + if (manifest["annotations"] is JsonArray existingAnnos) + { + AddIfNew(existingAnnos, annotationsRef); + } + else if (manifest["annotations"] is JsonObject singleAnno) + { + var arr = new JsonArray(singleAnno.DeepClone()); + AddIfNew(arr, annotationsRef); + manifest["annotations"] = arr; + } + else + { + manifest["annotations"] = new JsonArray(annotationsRef); + } + } + + private async Task InjectFiguresRefAsync(JsonObject manifest, ResolvedRequest resolved, string key) + { + var figuresJson = await textStore.LoadFigures(key); + if (figuresJson == null) return; + + logger.LogDebug("Adding figures: {Id}", resolved.EffectiveId); + + var figuresRef = new JsonObject + { + ["id"] = resolved.FiguresUrl(), + ["type"] = "AnnotationPage", + ["label"] = new JsonObject + { + ["en"] = new JsonArray("Figures, tables and illustrations"), + }, + }; + + if (manifest["annotations"] is JsonArray existingAnnos) + { + AddIfNew(existingAnnos, figuresRef, prepend: false); + } + else if (manifest["annotations"] is JsonObject singleAnno) + { + var arr = new JsonArray(singleAnno.DeepClone()); + AddIfNew(arr, figuresRef, prepend: false); + manifest["annotations"] = arr; + } + else + { + manifest["annotations"] = new JsonArray(figuresRef); + } + } + + private static void AddIfNew(JsonArray array, JsonObject item, bool prepend = true) + { + if (array.Any(n => n?["id"]?.GetValue() == item["id"]?.GetValue())) return; + if (prepend) array.Insert(0, item); + else array.Add(item); + } + + private static void AppendContext(JsonObject manifest, string url) + { + if (manifest["@context"] is JsonArray arr) + { + if (!arr.Any(n => n?.GetValue() == url)) + arr.Add(url); + } + else if (manifest["@context"] is JsonValue str) + { + manifest["@context"] = new JsonArray(str.GetValue(), url); + } + else + { + manifest["@context"] = new JsonArray(url); + } + } + + private static JsonObject BuildPdfRef(ResolvedRequest resolved) => new() + { + ["id"] = resolved.PdfUrl(), + ["type"] = "Text", + ["label"] = new JsonObject { ["en"] = new JsonArray("Download as PDF") }, + ["format"] = "application/pdf", + }; +} diff --git a/src/TextServices.Search.Api/Program.cs b/src/TextServices.Search.Api/Program.cs index 5546457..7b4a119 100644 --- a/src/TextServices.Search.Api/Program.cs +++ b/src/TextServices.Search.Api/Program.cs @@ -1,4 +1,5 @@ using System.IO.Compression; +using Amazon.S3; using AsyncKeyedLock; using Microsoft.AspNetCore.ResponseCompression; using Microsoft.Extensions.Options; @@ -60,11 +61,30 @@ // ---- Storage ---------------------------------------------------------------- +builder.Services.AddDefaultAWSOptions(builder.Configuration.GetAWSOptions()); + +if (!string.IsNullOrEmpty(builder.Configuration["TextServices:Storage:S3:BucketName"])) +{ + Log.Debug("Using S3 storage for text artefacts"); + builder.Services.AddAWSService(); +} + builder.Services.AddSingleton(sp => - new FileSystemTextStore(new FileSystemTextStoreOptions +{ + var opts = sp.GetRequiredService>().Value; + if (!string.IsNullOrEmpty(opts.Storage.S3.BucketName)) { - RootPath = sp.GetRequiredService>().Value.StorageRootPath - })); + var s3Opts = Options.Create(new S3TextStoreOptions + { + BucketName = opts.Storage.S3.BucketName, + KeyPrefix = opts.Storage.S3.KeyPrefix + }); + return ActivatorUtilities.CreateInstance(sp, s3Opts); + } + return ActivatorUtilities.CreateInstance( + sp, + new FileSystemTextStoreOptions { RootPath = opts.Storage.FileSystem.RootPath }); +}); // ITextStore is also injected directly into TextAugmentedHandler (manifest is plain JSON, // not routed through the Text/AutoComplete cache). diff --git a/src/TextServices.Search.Api/Services/CacheEndpoints.cs b/src/TextServices.Search.Api/Services/CacheEndpoints.cs index a20a7b4..31407f8 100644 --- a/src/TextServices.Search.Api/Services/CacheEndpoints.cs +++ b/src/TextServices.Search.Api/Services/CacheEndpoints.cs @@ -4,7 +4,7 @@ internal static class CacheEndpoints { internal static IEndpointRouteBuilder MapCacheEndpoints(this IEndpointRouteBuilder routes) { - routes.MapDelete("/cache/v1/{**id}", (string id, ITextCache textCache) => + routes.MapDelete("/cache/v1/{*id:minlength(1)}", (string id, ITextCache textCache) => { textCache.Invalidate(id); return Results.NoContent(); diff --git a/src/TextServices.Search.Api/TextServices.Search.Api.csproj b/src/TextServices.Search.Api/TextServices.Search.Api.csproj index 3b99240..9a111c4 100644 --- a/src/TextServices.Search.Api/TextServices.Search.Api.csproj +++ b/src/TextServices.Search.Api/TextServices.Search.Api.csproj @@ -8,7 +8,9 @@ - + + + diff --git a/src/TextServices.Search.Api/appsettings.Development-Example.json b/src/TextServices.Search.Api/appsettings.Development-Example.json new file mode 100644 index 0000000..71d8f4a --- /dev/null +++ b/src/TextServices.Search.Api/appsettings.Development-Example.json @@ -0,0 +1,11 @@ +{ + "TextServices": { + "BaseUrl": "http://localhost:5294", + "AllowFileImageProxy": true, + "Storage": { + "FileSystem": { + "RootPath": "C:/textservices-data" + } + } + } +} diff --git a/src/TextServices.Search.Api/appsettings.Test.json b/src/TextServices.Search.Api/appsettings.Test.json new file mode 100644 index 0000000..1ab060f --- /dev/null +++ b/src/TextServices.Search.Api/appsettings.Test.json @@ -0,0 +1,9 @@ +{ + "TextServices": { + "Storage": { + "S3": { + "BucketName": "" + } + } + } +} diff --git a/src/TextServices.Search.Api/appsettings.json b/src/TextServices.Search.Api/appsettings.json index fbc7a4d..de9217e 100644 --- a/src/TextServices.Search.Api/appsettings.json +++ b/src/TextServices.Search.Api/appsettings.json @@ -18,12 +18,9 @@ ] }, "AllowedHosts": "*", - "CorsAllowedOrigins": [ "http://localhost:5100" ], "TextServices": { - "BaseUrl": "", "CacheSlidingExpirationMinutes": 30, "CacheAbsoluteExpirationHours": 4, - "CacheMaxEntries": 20, - "StorageRootPath": "C:/textservices-data" + "CacheMaxEntries": 20 } } diff --git a/src/TextServices.Storage/FileSystemTextStore.cs b/src/TextServices.Storage/FileSystemTextStore.cs index 46c55a7..6052709 100644 --- a/src/TextServices.Storage/FileSystemTextStore.cs +++ b/src/TextServices.Storage/FileSystemTextStore.cs @@ -1,3 +1,4 @@ +using Microsoft.Extensions.Logging; using ProtoBuf; using TextServices.Core.Models; @@ -11,7 +12,7 @@ namespace TextServices.Storage; /// Job key segments (split on /) become nested subdirectories, so the key /// "2/books/my-book" is stored under {RootPath}/2/books/my-book/. /// -public class FileSystemTextStore : ITextStore +public class FileSystemTextStore(FileSystemTextStoreOptions options, ILogger logger) : ITextStore { private const string TextFileName = "text.bin"; private const string AutoCompleteFileName = "autocomplete.bin"; @@ -23,12 +24,7 @@ public class FileSystemTextStore : ITextStore private const string CapabilitiesFileName = "capabilities.json"; private const string PageSequenceFileName = "pagesequence.json"; - private readonly string _rootPath; - - public FileSystemTextStore(FileSystemTextStoreOptions options) - { - _rootPath = options.RootPath; - } + private readonly string _rootPath = options.RootPath; /// public async Task SaveText(string key, Text text) @@ -37,16 +33,22 @@ public async Task SaveText(string key, Text text) EnsureDirectory(path); await using var stream = File.Create(path); Serializer.Serialize(stream, text); + logger.LogDebug("Saved {Type} for '{Key}'", TextFileName, key); } /// public Task LoadText(string key) { var path = GetPath(key, TextFileName); - if (!File.Exists(path)) return Task.FromResult(null); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", TextFileName, key); + return Task.FromResult(null); + } using var stream = File.OpenRead(path); var text = Serializer.Deserialize(stream); + logger.LogDebug("Loaded {Type} for '{Key}'", TextFileName, key); return Task.FromResult(text); } @@ -57,16 +59,22 @@ public async Task SaveAutoComplete(string key, AutoComplete autoComplete) EnsureDirectory(path); await using var stream = File.Create(path); Serializer.Serialize(stream, autoComplete); + logger.LogDebug("Saved {Type} for '{Key}'", AutoCompleteFileName, key); } /// public Task LoadAutoComplete(string key) { var path = GetPath(key, AutoCompleteFileName); - if (!File.Exists(path)) return Task.FromResult(null); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", AutoCompleteFileName, key); + return Task.FromResult(null); + } using var stream = File.OpenRead(path); var ac = Serializer.Deserialize(stream); + logger.LogDebug("Loaded {Type} for '{Key}'", AutoCompleteFileName, key); return Task.FromResult(ac); } @@ -76,14 +84,21 @@ public async Task SaveManifest(string key, string json) var path = GetPath(key, ManifestFileName); EnsureDirectory(path); await File.WriteAllTextAsync(path, json); + logger.LogDebug("Saved {Type} for '{Key}'", ManifestFileName, key); } /// public async Task LoadManifest(string key) { var path = GetPath(key, ManifestFileName); - if (!File.Exists(path)) return null; - return await File.ReadAllTextAsync(path); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", ManifestFileName, key); + return null; + } + var result = await File.ReadAllTextAsync(path); + logger.LogDebug("Loaded {Type} for '{Key}'", ManifestFileName, key); + return result; } /// @@ -92,14 +107,21 @@ public async Task SaveRawText(string key, string rawText) var path = GetPath(key, RawTextFileName); EnsureDirectory(path); await File.WriteAllTextAsync(path, rawText); + logger.LogDebug("Saved {Type} for '{Key}'", RawTextFileName, key); } /// public async Task LoadRawText(string key) { var path = GetPath(key, RawTextFileName); - if (!File.Exists(path)) return null; - return await File.ReadAllTextAsync(path); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", RawTextFileName, key); + return null; + } + var result = await File.ReadAllTextAsync(path); + logger.LogDebug("Loaded {Type} for '{Key}'", RawTextFileName, key); + return result; } /// @@ -109,13 +131,19 @@ public async Task SavePdf(string key, Stream pdfStream) EnsureDirectory(path); await using var file = File.Create(path); await pdfStream.CopyToAsync(file); + logger.LogDebug("Saved {Type} for '{Key}'", PdfFileName, key); } /// public Task LoadPdf(string key) { var path = GetPath(key, PdfFileName); - if (!File.Exists(path)) return Task.FromResult(null); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", PdfFileName, key); + return Task.FromResult(null); + } + logger.LogDebug("Loaded {Type} for '{Key}'", PdfFileName, key); return Task.FromResult(File.OpenRead(path)); } @@ -125,14 +153,21 @@ public async Task SaveFigures(string key, string json) var path = GetPath(key, FiguresFileName); EnsureDirectory(path); await File.WriteAllTextAsync(path, json); + logger.LogDebug("Saved {Type} for '{Key}'", FiguresFileName, key); } /// public async Task LoadFigures(string key) { var path = GetPath(key, FiguresFileName); - if (!File.Exists(path)) return null; - return await File.ReadAllTextAsync(path); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", FiguresFileName, key); + return null; + } + var result = await File.ReadAllTextAsync(path); + logger.LogDebug("Loaded {Type} for '{Key}'", FiguresFileName, key); + return result; } /// @@ -141,21 +176,30 @@ public async Task SaveAnnotations(string key, string json) var path = GetPath(key, AnnotationsFileName); EnsureDirectory(path); await File.WriteAllTextAsync(path, json); + logger.LogDebug("Saved {Type} for '{Key}'", AnnotationsFileName, key); } /// public async Task LoadAnnotations(string key) { var path = GetPath(key, AnnotationsFileName); - if (!File.Exists(path)) return null; - return await File.ReadAllTextAsync(path); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", AnnotationsFileName, key); + return null; + } + var result = await File.ReadAllTextAsync(path); + logger.LogDebug("Loaded {Type} for '{Key}'", AnnotationsFileName, key); + return result; } /// public Task Exists(string key) { var path = GetPath(key, TextFileName); - return Task.FromResult(File.Exists(path)); + var exists = File.Exists(path); + logger.LogDebug("Artefact {Exists} for '{Key}'", exists, key); + return Task.FromResult(exists); } /// @@ -164,14 +208,20 @@ public async Task SaveCapabilities(string key, int services) var path = GetPath(key, CapabilitiesFileName); EnsureDirectory(path); await File.WriteAllTextAsync(path, services.ToString()); + logger.LogDebug("Saved {Type} for '{Key}'", CapabilitiesFileName, key); } /// public async Task LoadCapabilities(string key) { var path = GetPath(key, CapabilitiesFileName); - if (!File.Exists(path)) return null; + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", CapabilitiesFileName, key); + return null; + } var text = await File.ReadAllTextAsync(path); + logger.LogDebug("Loaded {Type} for '{Key}'", CapabilitiesFileName, key); return int.TryParse(text.Trim(), out var value) ? value : null; } @@ -181,13 +231,19 @@ public async Task SavePageSequence(string key, string json) var path = GetPath(key, PageSequenceFileName); EnsureDirectory(path); await File.WriteAllTextAsync(path, json); + logger.LogDebug("Saved {Type} for '{Key}'", PageSequenceFileName, key); } /// public Task LoadPageSequence(string key) { var path = GetPath(key, PageSequenceFileName); - if (!File.Exists(path)) return Task.FromResult(null); + if (!File.Exists(path)) + { + logger.LogDebug("{Type} not found for '{Key}'", PageSequenceFileName, key); + return Task.FromResult(null); + } + logger.LogDebug("Loaded {Type} for '{Key}'", PageSequenceFileName, key); return File.ReadAllTextAsync(path)!; } @@ -205,6 +261,7 @@ public Task DeleteArtefacts(string key) var path = GetPath(key, fileName); if (File.Exists(path)) File.Delete(path); } + logger.LogDebug("Deleted artefacts for '{Key}'", key); return Task.CompletedTask; } diff --git a/src/TextServices.Storage/S3TextStore.cs b/src/TextServices.Storage/S3TextStore.cs index b23de51..0b0a73d 100644 --- a/src/TextServices.Storage/S3TextStore.cs +++ b/src/TextServices.Storage/S3TextStore.cs @@ -1,5 +1,6 @@ using Amazon.S3; using Amazon.S3.Model; +using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using ProtoBuf; using TextServices.Core.Models; @@ -13,7 +14,7 @@ namespace TextServices.Storage; /// Job key segments (split on /) become S3 key path components, so the key /// "2/books/my-book" is stored under {KeyPrefix}2/books/my-book/. /// -public class S3TextStore : ITextStore, IDisposable +public class S3TextStore(IOptions options, IAmazonS3 s3, ILogger logger) : ITextStore, IDisposable { private const string TextFileName = "text.bin"; private const string AutoCompleteFileName = "autocomplete.bin"; @@ -25,18 +26,10 @@ public class S3TextStore : ITextStore, IDisposable private const string CapabilitiesFileName = "capabilities.json"; private const string PageSequenceFileName = "pagesequence.json"; - private readonly IAmazonS3 _s3; - private readonly string _bucket; - private readonly string _prefix; - - public S3TextStore(IOptions options, IAmazonS3 s3) - { - _s3 = s3; - _bucket = options.Value.BucketName; - _prefix = string.IsNullOrEmpty(options.Value.KeyPrefix) - ? string.Empty - : options.Value.KeyPrefix.TrimEnd('/') + '/'; - } + private readonly string _bucket = options.Value.BucketName; + private readonly string _prefix = string.IsNullOrEmpty(options.Value.KeyPrefix) + ? string.Empty + : options.Value.KeyPrefix.TrimEnd('/') + '/'; /// public async Task SaveText(string key, Text text) @@ -44,13 +37,15 @@ public async Task SaveText(string key, Text text) using var stream = new MemoryStream(); Serializer.Serialize(stream, text); await PutObjectAsync(GetS3Key(key, TextFileName), stream, "application/octet-stream"); + logger.LogDebug("Saved {Type} for '{Key}'", TextFileName, key); } /// public async Task LoadText(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, TextFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, TextFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", TextFileName, key); using (stream) return Serializer.Deserialize(stream); } @@ -60,13 +55,15 @@ public async Task SaveAutoComplete(string key, AutoComplete autoComplete) using var stream = new MemoryStream(); Serializer.Serialize(stream, autoComplete); await PutObjectAsync(GetS3Key(key, AutoCompleteFileName), stream, "application/octet-stream"); + logger.LogDebug("Saved {Type} for '{Key}'", AutoCompleteFileName, key); } /// public async Task LoadAutoComplete(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, AutoCompleteFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, AutoCompleteFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", AutoCompleteFileName, key); using (stream) return Serializer.Deserialize(stream); } @@ -75,13 +72,15 @@ public async Task SaveManifest(string key, string json) { using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(json)); await PutObjectAsync(GetS3Key(key, ManifestFileName), stream, "application/json"); + logger.LogDebug("Saved {Type} for '{Key}'", ManifestFileName, key); } /// public async Task LoadManifest(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, ManifestFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, ManifestFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", ManifestFileName, key); using var reader = new StreamReader(stream); return await reader.ReadToEndAsync(); } @@ -91,13 +90,15 @@ public async Task SaveRawText(string key, string rawText) { using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(rawText)); await PutObjectAsync(GetS3Key(key, RawTextFileName), stream, "text/plain"); + logger.LogDebug("Saved {Type} for '{Key}'", RawTextFileName, key); } /// public async Task LoadRawText(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, RawTextFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, RawTextFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", RawTextFileName, key); using var reader = new StreamReader(stream); return await reader.ReadToEndAsync(); } @@ -108,12 +109,14 @@ public async Task SavePdf(string key, Stream pdfStream) using var ms = new MemoryStream(); await pdfStream.CopyToAsync(ms); await PutObjectAsync(GetS3Key(key, PdfFileName), ms, "application/pdf"); + logger.LogDebug("Saved {Type} for '{Key}'", PdfFileName, key); } /// public async Task LoadPdf(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, PdfFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, PdfFileName), key); + if (stream != null) logger.LogDebug("Loaded {Type} for '{Key}'", PdfFileName, key); return stream; } @@ -122,13 +125,15 @@ public async Task SaveFigures(string key, string json) { using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(json)); await PutObjectAsync(GetS3Key(key, FiguresFileName), stream, "application/json"); + logger.LogDebug("Saved {Type} for '{Key}'", FiguresFileName, key); } /// public async Task LoadFigures(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, FiguresFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, FiguresFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", FiguresFileName, key); using var reader = new StreamReader(stream); return await reader.ReadToEndAsync(); } @@ -138,13 +143,15 @@ public async Task SaveAnnotations(string key, string json) { using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(json)); await PutObjectAsync(GetS3Key(key, AnnotationsFileName), stream, "application/json"); + logger.LogDebug("Saved {Type} for '{Key}'", AnnotationsFileName, key); } /// public async Task LoadAnnotations(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, AnnotationsFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, AnnotationsFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", AnnotationsFileName, key); using var reader = new StreamReader(stream); return await reader.ReadToEndAsync(); } @@ -154,11 +161,13 @@ public async Task Exists(string key) { try { - await _s3.GetObjectMetadataAsync(_bucket, GetS3Key(key, TextFileName)); + await s3.GetObjectMetadataAsync(_bucket, GetS3Key(key, TextFileName)); + logger.LogDebug("Artefact exists for '{Key}'", key); return true; } catch (AmazonS3Exception ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { + logger.LogDebug("Artefact not found for '{Key}'", key); return false; } } @@ -168,13 +177,15 @@ public async Task SaveCapabilities(string key, int services) { using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(services.ToString())); await PutObjectAsync(GetS3Key(key, CapabilitiesFileName), stream, "text/plain"); + logger.LogDebug("Saved {Type} for '{Key}'", CapabilitiesFileName, key); } /// public async Task LoadCapabilities(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, CapabilitiesFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, CapabilitiesFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", CapabilitiesFileName, key); using var reader = new StreamReader(stream); var text = await reader.ReadToEndAsync(); return int.TryParse(text.Trim(), out var value) ? value : null; @@ -185,13 +196,15 @@ public async Task SavePageSequence(string key, string json) { using var stream = new MemoryStream(System.Text.Encoding.UTF8.GetBytes(json)); await PutObjectAsync(GetS3Key(key, PageSequenceFileName), stream, "application/json"); + logger.LogDebug("Saved {Type} for '{Key}'", PageSequenceFileName, key); } /// public async Task LoadPageSequence(string key) { - var stream = await GetObjectStreamAsync(GetS3Key(key, PageSequenceFileName)); + var stream = await GetObjectStreamAsync(GetS3Key(key, PageSequenceFileName), key); if (stream == null) return null; + logger.LogDebug("Loaded {Type} for '{Key}'", PageSequenceFileName, key); using var reader = new StreamReader(stream); return await reader.ReadToEndAsync(); } @@ -209,16 +222,17 @@ public async Task DeleteArtefacts(string key) { try { - await _s3.DeleteObjectAsync(_bucket, GetS3Key(key, fileName)); + await s3.DeleteObjectAsync(_bucket, GetS3Key(key, fileName)); } catch (AmazonS3Exception ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { // Idempotent — object not found is not an error } } + logger.LogDebug("Deleted artefacts for '{Key}'", key); } - public void Dispose() => _s3.Dispose(); + public void Dispose() => s3.Dispose(); // ------------------------------------------------------------------------- // Helpers @@ -230,7 +244,7 @@ private string GetS3Key(string key, string fileName) private async Task PutObjectAsync(string s3Key, MemoryStream stream, string contentType) { stream.Position = 0; - await _s3.PutObjectAsync(new PutObjectRequest + await s3.PutObjectAsync(new PutObjectRequest { BucketName = _bucket, Key = s3Key, @@ -239,16 +253,22 @@ await _s3.PutObjectAsync(new PutObjectRequest }); } - private async Task GetObjectStreamAsync(string s3Key) + private async Task GetObjectStreamAsync(string s3Key, string key) { try { - var response = await _s3.GetObjectAsync(_bucket, s3Key); + var response = await s3.GetObjectAsync(_bucket, s3Key); return response.ResponseStream; } catch (AmazonS3Exception ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { + logger.LogDebug("{S3Key} not found for '{Key}'", s3Key, key); return null; } + catch (AmazonS3Exception ex) + { + logger.LogWarning(ex, "Unexpected S3 error loading '{S3Key}'", s3Key); + throw; + } } } diff --git a/src/TextServices.Tests.E2E/BuildAndSearchTests.cs b/src/TextServices.Tests.E2E/BuildAndSearchTests.cs index ef46358..b8fcc4a 100644 --- a/src/TextServices.Tests.E2E/BuildAndSearchTests.cs +++ b/src/TextServices.Tests.E2E/BuildAndSearchTests.cs @@ -37,6 +37,30 @@ public async Task PostJob_Returns202WithLocation() response.Headers.Location!.ToString().ShouldContain("e2e/lifecycle-test"); } + [Fact] + public async Task PostJob_IdStartingWithSlash_Returns400() + { + var response = await ctx.BuilderClient.PostAsJsonAsync("/textbuilder", new + { + id = "/bad-id", + sourceUri = "https://iiif.wellcomecollection.org/presentation/b2888193x" + }); + + response.StatusCode.ShouldBe(HttpStatusCode.BadRequest); + } + + [Fact] + public async Task PostJob_IdWithInternalSlashes_Returns202() + { + var response = await ctx.BuilderClient.PostAsJsonAsync("/textbuilder", new + { + id = "e2e/slash/in/id", + sourceUri = "https://iiif.wellcomecollection.org/presentation/b2888193x" + }); + + response.StatusCode.ShouldBe(HttpStatusCode.Accepted); + } + [Fact] public async Task PostJob_DuplicateId_Returns409() { @@ -376,6 +400,35 @@ public async Task ServiceFlags_SearchEnabled_AutocompleteDisabled_GatesEndpoints acResponse.StatusCode.ShouldBe(HttpStatusCode.NotFound); } + // ------------------------------------------------------------------------- + // Route safety — ID is required on all mutation / resource endpoints + // ------------------------------------------------------------------------- + + [Theory] + [InlineData("/textbuilder/", "DELETE")] + [InlineData("/textbuilder/", "PUT")] + public async Task BuilderApi_MutationWithoutId_Returns404(string path, string method) + { + var request = new HttpRequestMessage(new HttpMethod(method), path); + var response = await ctx.BuilderClient.SendAsync(request); + + response.StatusCode.ShouldBe(HttpStatusCode.NotFound); + } + + [Theory] + [InlineData("/search/v1/")] + [InlineData("/search/v2/")] + [InlineData("/autocomplete/v1/")] + [InlineData("/autocomplete/v2/")] + [InlineData("/text-augmented/v3/")] + [InlineData("/text/v1/")] + [InlineData("/annotations/manifest/v1/")] + public async Task SearchApi_ResourceEndpointWithoutId_Returns404(string path) + { + var response = await ctx.SearchClient.GetAsync(path); + response.StatusCode.ShouldBe(HttpStatusCode.NotFound); + } + // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- diff --git a/src/TextServices.Tests.E2E/Infrastructure/BuilderApiFactory.cs b/src/TextServices.Tests.E2E/Infrastructure/BuilderApiFactory.cs index 43f6473..108dfeb 100644 --- a/src/TextServices.Tests.E2E/Infrastructure/BuilderApiFactory.cs +++ b/src/TextServices.Tests.E2E/Infrastructure/BuilderApiFactory.cs @@ -4,6 +4,8 @@ using Microsoft.AspNetCore.TestHost; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Builder.Api.Data; using TextServices.Builder.Api.Services; using TextServices.Storage; @@ -33,31 +35,47 @@ public class BuilderApiFactory(string connectionString, string storageRoot, stri protected override void ConfigureWebHost(IWebHostBuilder builder) { - builder.ConfigureAppConfiguration((_, config) => - config.AddInMemoryCollection(new Dictionary + builder.ConfigureTestServices(services => { - ["ConnectionStrings:BuilderDb"] = connectionString, - ["TextServices:Storage:RootPath"] = StorageRoot, - ["RunMigrations"] = "false" - })); + // ---- Hangfire: replace PostgreSQL storage with InMemory -------------- + services.AddHangfire(config => config.UseInMemoryStorage()); - builder.ConfigureTestServices(services => - { - // ---- Hangfire: replace PostgreSQL storage with InMemory -------------- - services.AddHangfire(config => config.UseInMemoryStorage()); + // ---- Storage: use shared temp directory ----------------------------- + services.AddSingleton(_ => + new FileSystemTextStore( + new FileSystemTextStoreOptions { RootPath = StorageRoot }, + NullLogger.Instance)); - // ---- Storage: use shared temp directory ----------------------------- - services.AddSingleton(_ => - new FileSystemTextStore(new FileSystemTextStoreOptions { RootPath = StorageRoot })); + // ---- Fetchers: replace with fixture-based stubs --------------------- + services.AddScoped(_ => + new FixtureAltoFetcher(FixturesRoot)); - // ---- Fetchers: replace with fixture-based stubs --------------------- - services.AddScoped(_ => - new FixtureAltoFetcher(FixturesRoot)); + services.AddScoped(sp => + new FixtureManifestFetcher( + FixturesRoot, + sp.GetRequiredService())); + }) + .UseEnvironment("Test") + .UseDefaultServiceProvider((_, options) => + { + options.ValidateScopes = true; + }); + } - services.AddScoped(sp => - new FixtureManifestFetcher( - FixturesRoot, - sp.GetRequiredService())); + protected override IHost CreateHost(IHostBuilder builder) + { + var projectDir = Directory.GetCurrentDirectory(); + var configPath = Path.Combine(projectDir, "appsettings.Test.json"); + + builder.ConfigureHostConfiguration(config => + { + config.AddInMemoryCollection(new Dictionary + { + ["ConnectionStrings:BuilderDb"] = connectionString, + ["TextServices:Storage:FileSystem:RootPath"] = StorageRoot, + }); + config.AddJsonFile(configPath); }); + return base.CreateHost(builder); } } diff --git a/src/TextServices.Tests.E2E/Infrastructure/SearchApiFactory.cs b/src/TextServices.Tests.E2E/Infrastructure/SearchApiFactory.cs index 52c2ad3..e0878e4 100644 --- a/src/TextServices.Tests.E2E/Infrastructure/SearchApiFactory.cs +++ b/src/TextServices.Tests.E2E/Infrastructure/SearchApiFactory.cs @@ -3,6 +3,8 @@ using Microsoft.AspNetCore.TestHost; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging.Abstractions; using TextServices.Search.Api.Services; using TextServices.Storage; @@ -20,15 +22,32 @@ public class SearchApiFactory(string storageRoot) : WebApplicationFactory + builder.ConfigureTestServices(services => + services.AddSingleton(_ => + new FileSystemTextStore( + new FileSystemTextStoreOptions { RootPath = storageRoot }, + NullLogger.Instance))) + .UseEnvironment("Test") + .UseDefaultServiceProvider((_, options) => + { + options.ValidateScopes = true; + }); + } + + protected override IHost CreateHost(IHostBuilder builder) + { + var projectDir = Directory.GetCurrentDirectory(); + var configPath = Path.Combine(projectDir, "appsettings.Test.json"); + + builder.ConfigureHostConfiguration(config => + { config.AddInMemoryCollection(new Dictionary { - ["TextServices:StorageRootPath"] = storageRoot, + ["TextServices:Storage:FileSystem:RootPath"] = storageRoot, ["TextServices:BaseUrl"] = "http://localhost" - })); - - builder.ConfigureTestServices(services => - services.AddSingleton(_ => - new FileSystemTextStore(new FileSystemTextStoreOptions { RootPath = storageRoot }))); + }); + config.AddJsonFile(configPath); + }); + return base.CreateHost(builder); } } diff --git a/src/TextServices.Tests/BuilderApi/JobInstructionTests.cs b/src/TextServices.Tests/BuilderApi/JobInstructionTests.cs new file mode 100644 index 0000000..e3d1161 --- /dev/null +++ b/src/TextServices.Tests/BuilderApi/JobInstructionTests.cs @@ -0,0 +1,189 @@ +using System.ComponentModel.DataAnnotations; +using Shouldly; +using TextServices.Builder.Api.Features.Jobs; + +namespace TextServices.Tests.BuilderApi; + +public class JobInstructionTests +{ + private static List Validate(JobInstruction instruction) + { + var results = new List(); + Validator.TryValidateObject(instruction, new ValidationContext(instruction), results, validateAllProperties: true); + return results; + } + + private static PageInstruction Page(string? id = "https://example.org/canvas/1", string? type = null) => + new() { Id = id, Type = type }; + + // ------------------------------------------------------------------------- + // Id — leading slash + // ------------------------------------------------------------------------- + + [Fact] + public void Id_StartingWithSlash_FailsValidation() + { + var results = Validate(new JobInstruction + { + Id = "/bad-id", + SourceUri = "https://example.org/manifest", + }); + + results.ShouldContain(r => + r.MemberNames.Contains(nameof(JobInstruction.Id)) && + r.ErrorMessage == "Job ID must not start with '/'."); + } + + [Theory] + [InlineData("my-job")] + [InlineData("2/books/my-book")] + [InlineData("customer/collection/item")] + public void Id_NotStartingWithSlash_NoIdError(string id) + { + var results = Validate(new JobInstruction + { + Id = id, + SourceUri = "https://example.org/manifest", + }); + + results.ShouldNotContain(r => r.MemberNames.Contains(nameof(JobInstruction.Id))); + } + + // ------------------------------------------------------------------------- + // Source — neither / both / one + // ------------------------------------------------------------------------- + + [Fact] + public void NeitherSourceUriNorSourceData_FailsValidation() + { + var results = Validate(new JobInstruction { Id = "my-job" }); + + results.ShouldContain(r => + r.ErrorMessage == "Exactly one of sourceUri or sourceData must be provided."); + } + + [Fact] + public void BothSourceUriAndSourceData_FailsValidation() + { + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceUri = "https://example.org/manifest", + SourceData = [Page()], + }); + + results.ShouldContain(r => + r.ErrorMessage == "Provide sourceUri or sourceData, not both."); + } + + [Fact] + public void OnlySourceUri_PassesSourceValidation() + { + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceUri = "https://example.org/manifest", + }); + + results.ShouldNotContain(r => + r.MemberNames.Contains(nameof(JobInstruction.SourceUri)) || + r.MemberNames.Contains(nameof(JobInstruction.SourceData))); + } + + [Fact] + public void OnlySourceData_PassesSourceValidation() + { + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceData = [Page()], + }); + + results.ShouldNotContain(r => + r.MemberNames.Contains(nameof(JobInstruction.SourceUri)) || + r.MemberNames.Contains(nameof(JobInstruction.SourceData))); + } + + [Fact] + public void EmptySourceDataList_TreatedAsNoSource() + { + // Count: 0 means hasData is false — same as omitting SourceData entirely. + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceData = [], + }); + + results.ShouldContain(r => + r.ErrorMessage == "Exactly one of sourceUri or sourceData must be provided."); + } + + // ------------------------------------------------------------------------- + // SourceData — per-page id requirement + // ------------------------------------------------------------------------- + + [Fact] + public void SourceData_NonPdfPageWithoutId_FailsValidation() + { + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceData = [Page(id: null)], + }); + + results.ShouldContain(r => + r.MemberNames.Contains(nameof(JobInstruction.SourceData)) && + r.ErrorMessage == "Each non-pdf page in sourceData must supply an 'id' (canvas identifier URI)."); + } + + [Fact] + public void SourceData_NonPdfPageWithId_PassesValidation() + { + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceData = [Page(id: "https://example.org/canvas/1")], + }); + + results.ShouldNotContain(r => + r.ErrorMessage == "Each non-pdf page in sourceData must supply an 'id' (canvas identifier URI)."); + } + + [Theory] + [InlineData("pdf")] + [InlineData("PDF")] + [InlineData("Pdf")] + public void SourceData_PdfPageWithoutId_PassesValidation(string pdfType) + { + // pdf-type pages are exempt from the id requirement. + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceData = [Page(id: null, type: pdfType)], + }); + + results.ShouldNotContain(r => + r.ErrorMessage == "Each non-pdf page in sourceData must supply an 'id' (canvas identifier URI)."); + } + + [Fact] + public void SourceData_MixedPages_OnlyPagesMissingIdFail() + { + var results = Validate(new JobInstruction + { + Id = "my-job", + SourceData = + [ + Page(id: "https://example.org/canvas/1"), + Page(id: null), // should fail + Page(id: null, type: "pdf"), // exempt + Page(id: "https://example.org/canvas/3"), + ], + }); + + // Exactly one error for the page missing its id. + results.Count(r => + r.ErrorMessage == "Each non-pdf page in sourceData must supply an 'id' (canvas identifier URI).") + .ShouldBe(1); + } +} diff --git a/src/TextServices.Tests/BuilderApi/TextBuildJobTests.cs b/src/TextServices.Tests/BuilderApi/TextBuildJobTests.cs index 82cd857..6463f40 100644 --- a/src/TextServices.Tests/BuilderApi/TextBuildJobTests.cs +++ b/src/TextServices.Tests/BuilderApi/TextBuildJobTests.cs @@ -36,7 +36,8 @@ public TextBuildJobTests() _db = new BuilderDbContext(options); _textStore = new FileSystemTextStore( - new FileSystemTextStoreOptions { RootPath = _tempDir }); + new FileSystemTextStoreOptions { RootPath = _tempDir }, + NullLogger.Instance); } public void Dispose() @@ -653,6 +654,7 @@ private TextBuildJob MakeJob( _textStore, jobNotifier ?? new NoOpJobNotifier(), Options.Create(new TextServicesOptions()), + NullLoggerFactory.Instance, NullLogger.Instance); } diff --git a/src/TextServices.Tests/Core/TextNormalisationEdgeCaseTests.cs b/src/TextServices.Tests/Core/TextNormalisationEdgeCaseTests.cs index 09ae8c6..9f96be4 100644 --- a/src/TextServices.Tests/Core/TextNormalisationEdgeCaseTests.cs +++ b/src/TextServices.Tests/Core/TextNormalisationEdgeCaseTests.cs @@ -154,8 +154,8 @@ public void Normalise_WhitespaceVariants_CollapsedToSingleSpace(string input, st // IMPORTANT KNOWN LIMITATION: accented letters are kept as-is (lowercased). // "café" normalises to "caf\u00e9", NOT "cafe". This means a user searching // "cafe" (no accent) will NOT find "café" in the text. This replicates the - // behaviour of both reference implementations exactly (Wellcome has a TODO - // comment noting this limitation). Do not change this behaviour silently. + // behaviour of both reference implementations exactly. + // Do not change this behaviour silently. // ------------------------------------------------------------------------- [Fact] diff --git a/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs b/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs index 1e4ecbd..ae33940 100644 --- a/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs +++ b/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs @@ -1,5 +1,7 @@ +using Microsoft.Extensions.Logging.Abstractions; using Shouldly; using TextServices.Core.Models; +using TextServices.Search.Api.Features; using TextServices.Search.Api.Features.Annotations; using TextServices.Search.Api.Features.Autocomplete; using TextServices.Search.Api.Features.Figures; @@ -30,7 +32,7 @@ public class CapabilityGatingTests [Fact] public async Task Search_WhenSearchFlagAbsent_ReturnsNull() { - var handler = new SearchHandler(new StubTextCache(JobServices.Autocomplete)); + var handler = new SearchHandler(new StubTextCache(JobServices.Autocomplete), new NullLogger()); var result = await handler.Handle( new SearchRequest(Id, "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -42,7 +44,7 @@ public async Task Search_WhenSearchFlagAbsent_ReturnsNull() public async Task Search_WhenCapabilitiesNull_ProceedsToTextLookup() { // null capabilities = all enabled; text = null → null result (text not found) - var handler = new SearchHandler(new StubTextCache(null)); + var handler = new SearchHandler(new StubTextCache(null), new NullLogger()); var result = await handler.Handle( new SearchRequest(Id, "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -159,11 +161,11 @@ public async Task WordAnnotations_WhenAnnotationsFlagAbsent_ReturnsNull() [Fact] public async Task TextAugmented_WhenTextAugmentedFlagAbsent_ReturnsNull() { - var handler = new TextAugmentedHandler( - new AllNullStore(), new StubTextCache(JobServices.Search)); + var handler = new TextAugmentedHandler(new AllNullStore(), new StubTextCache(JobServices.Search), + new NullLogger()); var result = await handler.Handle( - new TextAugmentedRequest(Id, SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest(Id, new ResolvedRequest(Id, SelfUrl, SelfUrl, SearchBase)), CancellationToken.None); result.ShouldBeNull(); } diff --git a/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs b/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs index 589b0ec..d616100 100644 --- a/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs +++ b/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs @@ -1,4 +1,5 @@ using System.Xml.Linq; +using Microsoft.Extensions.Logging.Abstractions; using Shouldly; using TextServices.Core.Models; using TextServices.Core.Providers; @@ -24,7 +25,7 @@ public class SearchHandlerTests public async Task Handle_EmptyQuery_ReturnsEmptyResponse() { var text = BuildText([("https://example.org/c/1", 1000, 1500, "hello world")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "", SelfUrl, SelfUrl), CancellationToken.None); @@ -39,7 +40,7 @@ public async Task Handle_EmptyQuery_ReturnsEmptyResponse() public async Task Handle_QueryNotFound_ReturnsEmptyResponse() { var text = BuildText([("https://example.org/c/1", 1000, 1500, "hello world")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "parliament", SelfUrl, SelfUrl), CancellationToken.None); @@ -52,7 +53,7 @@ public async Task Handle_QueryNotFound_ReturnsEmptyResponse() [Fact] public async Task Handle_TextNotFound_ReturnsNull() { - var handler = new SearchHandler(new StubTextCache(null)); + var handler = new SearchHandler(new StubTextCache(null), new NullLogger()); var result = await handler.Handle( new SearchRequest("missing/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -69,7 +70,7 @@ public async Task Handle_SingleHit_CorrectAnnotationShape() { var canvasId = "https://example.org/c/1"; var text = BuildText([(canvasId, 1000, 1500, "the quick brown fox")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "quick", SelfUrl, SelfUrl), CancellationToken.None); @@ -91,7 +92,7 @@ public async Task Handle_SingleHit_CorrectAnnotationShape() public async Task Handle_SingleHit_CorrectHitShape() { var text = BuildText([("https://example.org/c/1", 1000, 1500, "the quick brown fox")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "quick", SelfUrl, SelfUrl), CancellationToken.None); @@ -112,7 +113,7 @@ public async Task Handle_SingleHit_CorrectHitShape() public async Task Handle_ResponseHasCorrectId() { var text = BuildText([("https://example.org/c/1", 1000, 1500, "hello world")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -127,7 +128,7 @@ public async Task Handle_MultipleHits_CorrectCount() { // "the" appears twice in this text var text = BuildText([("https://example.org/c/1", 1000, 1500, "the quick brown the fox")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "the", SelfUrl, SelfUrl), CancellationToken.None); @@ -142,7 +143,7 @@ public async Task Handle_WithQueryInSelfUrl_AnnotationIdDoesNotContainQueryStrin { var selfUrlWithQuery = $"{SelfUrl}?q=quick"; var text = BuildText([("https://example.org/c/1", 1000, 1500, "the quick brown fox")]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "quick", selfUrlWithQuery, SelfUrl), CancellationToken.None); @@ -163,7 +164,7 @@ public async Task Handle_MultiPage_AnnotationsReferenceCorrectCanvas() (canvas1, 1000, 1500, "hello world"), (canvas2, 1000, 1500, "parliament square"), ]); - var handler = new SearchHandler(new StubTextCache(text)); + var handler = new SearchHandler(new StubTextCache(text), new NullLogger()); var result = await handler.Handle( new SearchRequest("test/book", "parliament", SelfUrl, SelfUrl), CancellationToken.None); diff --git a/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs b/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs index 558c993..d1562fe 100644 --- a/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs +++ b/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs @@ -1,4 +1,5 @@ using System.Xml.Linq; +using Microsoft.Extensions.Logging.Abstractions; using Shouldly; using TextServices.Core.Models; using TextServices.Core.Providers; @@ -23,7 +24,7 @@ public class SearchV2HandlerTests [Fact] public async Task Handle_TextNotFound_ReturnsNull() { - var handler = new SearchV2Handler(new StubTextCache(null)); + var handler = GetSearchV2Handler(null); var result = await handler.Handle( new SearchV2Request("missing/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -35,7 +36,7 @@ public async Task Handle_TextNotFound_ReturnsNull() public async Task Handle_EmptyQuery_ReturnsEmptyResponse() { var text = BuildSpatialText([("https://example.org/c/1", 1000, 1500, "hello world")]); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "", SelfUrl, SelfUrl), CancellationToken.None); @@ -53,7 +54,7 @@ public async Task Handle_TemporalHit_MotivationIsSupplementing() { var canvasId = "https://example.org/c/audio"; var text = BuildTemporalText(canvasId, "00:00:05.000 --> 00:00:08.500\nHello world\n"); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -68,7 +69,7 @@ public async Task Handle_TemporalHit_TargetUsesTemporalFragment() { var canvasId = "https://example.org/c/audio"; var text = BuildTemporalText(canvasId, "00:00:05.000 --> 00:00:08.500\nHello world\n"); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -84,7 +85,7 @@ public async Task Handle_TemporalHit_FragmentUsesDecimalPoint() { var canvasId = "https://example.org/c/audio"; var text = BuildTemporalText(canvasId, "00:00:05.500 --> 00:00:08.750\nHello world\n"); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -103,7 +104,7 @@ public async Task Handle_TemporalHit_TimesAreInSeconds() var canvasId = "https://example.org/c/audio"; // StartMs = 5000 → 5 seconds var text = BuildTemporalText(canvasId, "00:00:05.000 --> 00:00:08.500\nHello world\n"); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -122,7 +123,7 @@ public async Task Handle_SpatialHit_MotivationIsPainting() { var canvasId = "https://example.org/c/1"; var text = BuildSpatialText([(canvasId, 1000, 1500, "the quick brown fox")]); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "quick", SelfUrl, SelfUrl), CancellationToken.None); @@ -140,7 +141,7 @@ public async Task Handle_WithQueryInSelfUrl_AnnotationIdDoesNotContainQueryStrin var selfUrlWithQuery = $"{SelfUrl}?q=quick"; var canvasId = "https://example.org/c/1"; var text = BuildSpatialText([(canvasId, 1000, 1500, "the quick brown fox")]); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "quick", selfUrlWithQuery, SelfUrl), CancellationToken.None); @@ -158,7 +159,7 @@ public async Task Handle_WithQueryInSelfUrl_ContextualizingAnnotationIdDoesNotCo var selfUrlWithQuery = $"{SelfUrl}?q=quick"; var canvasId = "https://example.org/c/1"; var text = BuildSpatialText([(canvasId, 1000, 1500, "the quick brown fox")]); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "quick", selfUrlWithQuery, SelfUrl), CancellationToken.None); @@ -179,7 +180,7 @@ public async Task Handle_TemporalHit_ContextualizingAnnotationPresent() { var canvasId = "https://example.org/c/audio"; var text = BuildTemporalText(canvasId, "00:00:05.000 --> 00:00:08.500\nHello world\n"); - var handler = new SearchV2Handler(new StubTextCache(text)); + var handler = GetSearchV2Handler(text); var result = await handler.Handle( new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); @@ -193,6 +194,10 @@ public async Task Handle_TemporalHit_ContextualizingAnnotationPresent() // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- + private static SearchV2Handler GetSearchV2Handler(Text? text) + { + return new SearchV2Handler(new StubTextCache(text), new NullLogger()); + } /// /// Builds a temporal from a canvas id and raw VTT cue block diff --git a/src/TextServices.Tests/SearchApi/TextAugmentedHandlerTests.cs b/src/TextServices.Tests/SearchApi/TextAugmentedHandlerTests.cs index 01ca794..35f3e99 100644 --- a/src/TextServices.Tests/SearchApi/TextAugmentedHandlerTests.cs +++ b/src/TextServices.Tests/SearchApi/TextAugmentedHandlerTests.cs @@ -1,7 +1,9 @@ using System.Text.Json.Nodes; +using Microsoft.Extensions.Logging.Abstractions; using Shouldly; using TextServices.Core.Models; using TextServices.Core.Providers; +using TextServices.Search.Api.Features; using TextServices.Search.Api.Features.TextAugmented; using TextServices.Search.Api.Services; using TextServices.Storage; @@ -29,11 +31,24 @@ public async Task Handle_ManifestNotFound_ReturnsNull() var handler = MakeHandler(null); var result = await handler.Handle( - new TextAugmentedRequest("missing/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("missing/book", DefaultResolved("missing/book")), CancellationToken.None); result.ShouldBeNull(); } + [Fact] + public async Task Handle_NoText_ReturnsManifestWithoutSearchServicesOrContext() + { + var handler = MakeHandler(V3Manifest(), noText: true); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + result.ShouldNotBeNull(); + result["service"].ShouldBeNull(); + result["@context"]!.GetValue().ShouldBe("http://iiif.io/api/presentation/3/context.json"); + } + // ------------------------------------------------------------------------- // @id replacement // ------------------------------------------------------------------------- @@ -44,7 +59,7 @@ public async Task Handle_V3Manifest_ReplacesIdWithSelfUrl() var handler = MakeHandler(V3Manifest("https://original.example.org/manifest/1")); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); result.ShouldNotBeNull(); result!["id"]!.GetValue().ShouldBe(SelfUrl); @@ -56,7 +71,7 @@ public async Task Handle_V2Manifest_ReplacesAtIdWithSelfUrl() var handler = MakeHandler(V2Manifest("https://original.example.org/manifest/1")); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); result.ShouldNotBeNull(); result!["@id"]!.GetValue().ShouldBe(SelfUrl); @@ -72,7 +87,7 @@ public async Task Handle_NoExistingService_CreatesTwoServiceEntries() var handler = MakeHandler(V3Manifest()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var service = result!["service"].ShouldBeOfType(); service.Count.ShouldBe(2); // v2 + v1 @@ -84,7 +99,7 @@ public async Task Handle_ExistingServiceArray_PrependsSearchServices() var handler = MakeHandler(V3ManifestWithServiceArray()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var service = result!["service"].ShouldBeOfType(); service.Count.ShouldBe(3); // v2 + v1 + original @@ -96,7 +111,7 @@ public async Task Handle_ExistingServiceObject_PromotesToArrayWithSearchFirst() var handler = MakeHandler(V3ManifestWithServiceObject()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var service = result!["service"].ShouldBeOfType(); service.Count.ShouldBe(3); // v2 + v1 + original @@ -109,7 +124,7 @@ public async Task Handle_ExistingServiceArray_SearchServicesAreFirst() var handler = MakeHandler(V3ManifestWithServiceArray()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var service = result!["service"].ShouldBeOfType(); service[0]!["id"]!.GetValue().ShouldBe(ExpectedSearchV2); @@ -123,7 +138,7 @@ public async Task Handle_SearchServiceV2_HasCorrectTypeAndId() var handler = MakeHandler(V3Manifest()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var searchServiceV2 = result!["service"]![0]!; searchServiceV2["id"]!.GetValue().ShouldBe(ExpectedSearchV2); @@ -136,7 +151,7 @@ public async Task Handle_SearchServiceV1_HasCorrectTypeProfileAndId() var handler = MakeHandler(V3Manifest()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var searchServiceV1 = result!["service"]![1]!; searchServiceV1["id"]!.GetValue().ShouldBe(ExpectedSearchV1); @@ -150,7 +165,7 @@ public async Task Handle_AutocompleteServiceV2_NestedInsideSearchServiceV2() var handler = MakeHandler(V3Manifest()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var autocomplete = result!["service"]![0]!["service"]![0]!; autocomplete["id"]!.GetValue().ShouldBe(ExpectedAutocompleteV2); @@ -163,7 +178,7 @@ public async Task Handle_AutocompleteServiceV1_NestedInsideSearchServiceV1() var handler = MakeHandler(V3Manifest()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var autocomplete = result!["service"]![1]!["service"]![0]!; autocomplete["id"]!.GetValue().ShouldBe(ExpectedAutocompleteV1); @@ -181,7 +196,7 @@ public async Task Handle_ImageBasedContent_AddsPdfAndPlainTextRenderingLinks() var handler = MakeHandler(V3Manifest(), cachedText: SpatialText()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var rendering = result!["rendering"].ShouldBeOfType(); rendering.Count.ShouldBe(2); @@ -197,7 +212,7 @@ public async Task Handle_TemporalOnlyContent_AddsPlainTextButNoPdfLink() var handler = MakeHandler(V3Manifest(), cachedText: TemporalText()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var rendering = result!["rendering"].ShouldBeOfType(); rendering.Count.ShouldBe(1); @@ -208,10 +223,10 @@ public async Task Handle_TemporalOnlyContent_AddsPlainTextButNoPdfLink() [Fact] public async Task Handle_TextNotBuilt_NoRenderingLinks() { - var handler = MakeHandler(V3Manifest(), cachedText: null); + var handler = MakeHandler(V3Manifest(), noText: true); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); result!["rendering"].ShouldBeNull(); } @@ -222,7 +237,7 @@ public async Task Handle_ImageBasedContent_RenderingLinksArePrependedToExisting( var handler = MakeHandler(V3ManifestWithRendering(), cachedText: SpatialText()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var rendering = result!["rendering"].ShouldBeOfType(); rendering.Count.ShouldBe(3); // PDF + plain text + original @@ -236,7 +251,7 @@ public async Task Handle_TemporalOnlyContent_PlainTextLinkPrependedToExisting() var handler = MakeHandler(V3ManifestWithRendering(), cachedText: TemporalText()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var rendering = result!["rendering"].ShouldBeOfType(); rendering.Count.ShouldBe(2); // plain text + original (no PDF) @@ -258,7 +273,7 @@ public async Task Handle_WithAnnotations_InjectsManifestAnnotationsRef() var handler = MakeHandler(V3Manifest(), annotationsJson: annotationsJson); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var annotations = result!["annotations"].ShouldBeOfType(); var ref0 = annotations[0]!; @@ -277,7 +292,7 @@ public async Task Handle_WithAnnotationsAndFigures_AnnotationsRefBeforeFiguresRe var handler = MakeHandler(V3Manifest(), annotationsJson: annotationsJson, figuresJson: figuresJson); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); var annotations = result!["annotations"].ShouldBeOfType(); annotations.Count.ShouldBe(2); @@ -291,17 +306,210 @@ public async Task Handle_WithoutAnnotations_NoManifestAnnotationsRef() var handler = MakeHandler(V3Manifest()); var result = await handler.Handle( - new TextAugmentedRequest("test/book", SelfUrl, SearchBase), CancellationToken.None); + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); result!["annotations"].ShouldBeNull(); } + // ------------------------------------------------------------------------- + // Service deduplication + // ------------------------------------------------------------------------- + + [Fact] + public async Task Handle_ServiceArrayAlreadyContainsSearchV2_DoesNotDuplicateV2() + { + var handler = MakeHandler(V3ManifestWithExistingSearchV2()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var service = result!["service"].ShouldBeOfType(); + service.Count.ShouldBe(2); // original v2 stays; v1 added; no duplicate v2 + service.Count(n => n?["id"]?.GetValue() == ExpectedSearchV2).ShouldBe(1); + service.Count(n => n?["id"]?.GetValue() == ExpectedSearchV1).ShouldBe(1); + } + + [Fact] + public async Task Handle_ServiceArrayAlreadyContainsBothSearchServices_NeitherDuplicated() + { + var handler = MakeHandler(V3ManifestWithBothSearchServices()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var service = result!["service"].ShouldBeOfType(); + service.Count.ShouldBe(2); // unchanged — both already present + service.Count(n => n?["id"]?.GetValue() == ExpectedSearchV2).ShouldBe(1); + service.Count(n => n?["id"]?.GetValue() == ExpectedSearchV1).ShouldBe(1); + } + + [Fact] + public async Task Handle_ServiceObjectMatchesSearchV2_PromotesToArrayWithoutDuplicatingV2() + { + var handler = MakeHandler(V3ManifestWithSearchV2ServiceObject()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var service = result!["service"].ShouldBeOfType(); + service.Count.ShouldBe(2); // v1 added; original v2 kept; no duplicate v2 + service.Count(n => n?["id"]?.GetValue() == ExpectedSearchV2).ShouldBe(1); + service.Count(n => n?["id"]?.GetValue() == ExpectedSearchV1).ShouldBe(1); + } + + // ------------------------------------------------------------------------- + // Rendering deduplication + // ------------------------------------------------------------------------- + + [Fact] + public async Task Handle_RenderingArrayAlreadyContainsTextRef_DoesNotDuplicateTextRef() + { + var handler = MakeHandler(V3ManifestWithExistingTextRef(), cachedText: SpatialText()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var rendering = result!["rendering"].ShouldBeOfType(); + rendering.Count(n => n?["id"]?.GetValue() == ExpectedRawTextUrl).ShouldBe(1); + } + + [Fact] + public async Task Handle_RenderingArrayAlreadyContainsPdfRef_DoesNotDuplicatePdfRef() + { + var handler = MakeHandler(V3ManifestWithExistingPdfRef(), cachedText: SpatialText()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var rendering = result!["rendering"].ShouldBeOfType(); + rendering.Count(n => n?["id"]?.GetValue() == ExpectedPdfUrl).ShouldBe(1); + } + + [Fact] + public async Task Handle_RenderingArrayAlreadyContainsBothRefs_NeitherDuplicated() + { + var handler = MakeHandler(V3ManifestWithExistingPdfAndTextRefs(), cachedText: SpatialText()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var rendering = result!["rendering"].ShouldBeOfType(); + rendering.Count.ShouldBe(2); // unchanged + rendering.Count(n => n?["id"]?.GetValue() == ExpectedPdfUrl).ShouldBe(1); + rendering.Count(n => n?["id"]?.GetValue() == ExpectedRawTextUrl).ShouldBe(1); + } + + // ------------------------------------------------------------------------- + // Annotations deduplication + // ------------------------------------------------------------------------- + + [Fact] + public async Task Handle_AnnotationsArrayAlreadyContainsAnnotationsRef_DoesNotDuplicate() + { + const string annotationsJson = """{"type":"AnnotationPage","items":[]}"""; + var handler = MakeHandler(V3ManifestWithExistingAnnotationsRef(), annotationsJson: annotationsJson); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var annotations = result!["annotations"].ShouldBeOfType(); + annotations.Count(n => n?["id"]?.GetValue() == + "https://search.example.org/annotations/manifest/v1/test/book").ShouldBe(1); + } + + [Fact] + public async Task Handle_AnnotationsArrayAlreadyContainsFiguresRef_DoesNotDuplicate() + { + const string figuresJson = """{"type":"AnnotationPage","items":[]}"""; + var handler = MakeHandler(V3ManifestWithExistingFiguresRef(), figuresJson: figuresJson); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var annotations = result!["annotations"].ShouldBeOfType(); + annotations.Count(n => n?["id"]?.GetValue() == + "https://search.example.org/identified/figures/test/book").ShouldBe(1); + } + + // ------------------------------------------------------------------------- + // @context injection + // ------------------------------------------------------------------------- + + [Fact] + public async Task Handle_NoContext_AddsSearchContexts() + { + var handler = MakeHandler("""{"id":"https://example.org/m/1","type":"Manifest"}"""); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var ctx = result!["@context"].ShouldBeOfType(); + ctx.Select(n => n!.GetValue()).ShouldBe( + [ + "http://iiif.io/api/search/2/context.json", + "http://iiif.io/api/search/1/context.json", + ]); + } + + [Fact] + public async Task Handle_StringContext_PromotesToArrayAndAppendsSearchContexts() + { + var handler = MakeHandler(V3Manifest()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var ctx = result!["@context"].ShouldBeOfType(); + ctx.Select(n => n!.GetValue()).ShouldBe( + [ + "http://iiif.io/api/presentation/3/context.json", + "http://iiif.io/api/search/2/context.json", + "http://iiif.io/api/search/1/context.json", + ]); + } + + [Fact] + public async Task Handle_ArrayContext_AppendsSearchContexts() + { + var handler = MakeHandler("""{"id":"https://example.org/m/1","type":"Manifest","@context":["http://iiif.io/api/presentation/3/context.json"]}"""); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var ctx = result!["@context"].ShouldBeOfType(); + ctx.Select(n => n!.GetValue()).ShouldBe( + [ + "http://iiif.io/api/presentation/3/context.json", + "http://iiif.io/api/search/2/context.json", + "http://iiif.io/api/search/1/context.json", + ]); + } + + [Fact] + public async Task Handle_ContextAlreadyContainsSearchUrls_DoesNotDuplicate() + { + var handler = MakeHandler(V3ManifestWithSearchContexts()); + + var result = await handler.Handle( + new TextAugmentedRequest("test/book", DefaultResolved()), CancellationToken.None); + + var ctx = result!["@context"].ShouldBeOfType(); + ctx.Count(n => n!.GetValue() == "http://iiif.io/api/search/2/context.json").ShouldBe(1); + ctx.Count(n => n!.GetValue() == "http://iiif.io/api/search/1/context.json").ShouldBe(1); + } + + private static ResolvedRequest DefaultResolved(string id = "test/book") + => new(id, SelfUrl, SelfUrl, SearchBase); + private static TextAugmentedHandler MakeHandler( string? manifestJson, string? annotationsJson = null, string? figuresJson = null, - Text? cachedText = null) - => new(new StubTextStore(manifestJson, annotationsJson, figuresJson), new StubTextCache(cachedText)); + Text? cachedText = null, + bool noText = false) + => new(new StubTextStore(manifestJson, annotationsJson, figuresJson), + new StubTextCache(noText ? null : (cachedText ?? SpatialText())), + new NullLogger()); /// A Text with one spatial (image-based) canvas. private static Text SpatialText() @@ -336,6 +544,33 @@ private static string V3ManifestWithServiceObject() private static string V3ManifestWithRendering() => """{"id":"https://example.org/m/1","type":"Manifest","rendering":[{"id":"https://example.org/pdf","type":"Text","format":"application/pdf"}]}"""; + private static string V3ManifestWithExistingSearchV2() + => """{"id":"https://example.org/m/1","type":"Manifest","service":[{"id":"https://search.example.org/search/v2/test/book","type":"SearchService2"}]}"""; + + private static string V3ManifestWithBothSearchServices() + => """{"id":"https://example.org/m/1","type":"Manifest","service":[{"id":"https://search.example.org/search/v2/test/book","type":"SearchService2"},{"id":"https://search.example.org/search/v1/test/book","type":"SearchService1"}]}"""; + + private static string V3ManifestWithSearchV2ServiceObject() + => """{"id":"https://example.org/m/1","type":"Manifest","service":{"id":"https://search.example.org/search/v2/test/book","type":"SearchService2"}}"""; + + private static string V3ManifestWithExistingTextRef() + => """{"id":"https://example.org/m/1","type":"Manifest","rendering":[{"id":"https://search.example.org/text/v1/test/book","type":"Text","format":"text/plain"}]}"""; + + private static string V3ManifestWithExistingPdfRef() + => """{"id":"https://example.org/m/1","type":"Manifest","rendering":[{"id":"https://search.example.org/pdf/v1/test/book","type":"Text","format":"application/pdf"}]}"""; + + private static string V3ManifestWithExistingPdfAndTextRefs() + => """{"id":"https://example.org/m/1","type":"Manifest","rendering":[{"id":"https://search.example.org/pdf/v1/test/book","type":"Text","format":"application/pdf"},{"id":"https://search.example.org/text/v1/test/book","type":"Text","format":"text/plain"}]}"""; + + private static string V3ManifestWithExistingAnnotationsRef() + => """{"id":"https://example.org/m/1","type":"Manifest","annotations":[{"id":"https://search.example.org/annotations/manifest/v1/test/book","type":"AnnotationPage"}]}"""; + + private static string V3ManifestWithExistingFiguresRef() + => """{"id":"https://example.org/m/1","type":"Manifest","annotations":[{"id":"https://search.example.org/identified/figures/test/book","type":"AnnotationPage"}]}"""; + + private static string V3ManifestWithSearchContexts() + => """{"id":"https://example.org/m/1","type":"Manifest","@context":["http://iiif.io/api/presentation/3/context.json","http://iiif.io/api/search/2/context.json","http://iiif.io/api/search/1/context.json"]}"""; + private sealed class StubTextStore( string? manifestJson, string? annotationsJson = null, diff --git a/src/TextServices.Tests/Storage/FileSystemTextStoreTests.cs b/src/TextServices.Tests/Storage/FileSystemTextStoreTests.cs index 228fbf5..737b347 100644 --- a/src/TextServices.Tests/Storage/FileSystemTextStoreTests.cs +++ b/src/TextServices.Tests/Storage/FileSystemTextStoreTests.cs @@ -1,3 +1,4 @@ +using Microsoft.Extensions.Logging.Abstractions; using Shouldly; using TextServices.Core.Models; using TextServices.Storage; @@ -16,7 +17,7 @@ public sealed class FileSystemTextStoreTests : IDisposable public FileSystemTextStoreTests() { _tempDir = Path.Combine(Path.GetTempPath(), $"TextServicesTests_{Guid.NewGuid():N}"); - _store = new FileSystemTextStore(new FileSystemTextStoreOptions { RootPath = _tempDir }); + _store = new FileSystemTextStore(new FileSystemTextStoreOptions { RootPath = _tempDir }, NullLogger.Instance); } public void Dispose() diff --git a/src/TextServices.Tests/Storage/S3TextStoreTests.cs b/src/TextServices.Tests/Storage/S3TextStoreTests.cs index 587ff7f..1b05a3d 100644 --- a/src/TextServices.Tests/Storage/S3TextStoreTests.cs +++ b/src/TextServices.Tests/Storage/S3TextStoreTests.cs @@ -1,6 +1,7 @@ using System.Net; using Amazon.S3; using Amazon.S3.Model; +using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using Shouldly; using TextServices.Core.Models; @@ -180,7 +181,8 @@ public async Task NoKeyPrefix_KeyStartsWithJobId() // ------------------------------------------------------------------------- private static S3TextStore MakeStore(FakeS3 fake, string prefix = "") - => new(Options.Create(new S3TextStoreOptions { BucketName = Bucket, KeyPrefix = prefix }), fake); + => new(Options.Create(new S3TextStoreOptions { BucketName = Bucket, KeyPrefix = prefix }), fake, + NullLogger.Instance); private static Text BuildText(string words) { diff --git a/src/TextServices.sln b/src/TextServices.sln index ffcec20..4c40bfe 100644 --- a/src/TextServices.sln +++ b/src/TextServices.sln @@ -21,6 +21,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TextServices.Pdf", "TextSer EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TextServices.Infrastructure", "TextServices.Infrastructure\TextServices.Infrastructure.csproj", "{209A6FAD-0476-42F5-92CC-624C677D661A}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Testing", "Testing", "{680666A5-6A80-48E3-AEC8-158909FF5FE4}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -143,4 +145,8 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {AA561B8F-C37B-41A5-808B-489CA2513BB6} = {680666A5-6A80-48E3-AEC8-158909FF5FE4} + {C2A377B6-A433-4BBF-BA0C-A8E8F8A9D6FB} = {680666A5-6A80-48E3-AEC8-158909FF5FE4} + EndGlobalSection EndGlobal