diff --git a/Dockerfile.Builder b/Dockerfile.Builder index 2766e9b..ac464a0 100644 --- a/Dockerfile.Builder +++ b/Dockerfile.Builder @@ -14,6 +14,8 @@ RUN dotnet publish TextServices.Builder.Api/TextServices.Builder.Api.csproj \ -c Release -o /app/publish --no-restore FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime + +RUN apt-get update && apt-get install -y libgssapi-krb5-2 libkrb5-3 krb5-user WORKDIR /app COPY --from=build /app/publish . EXPOSE 8080 diff --git a/docs/search-api.md b/docs/search-api.md index 43ffe21..cbbeb94 100644 --- a/docs/search-api.md +++ b/docs/search-api.md @@ -517,5 +517,37 @@ Search API configuration lives under the `TextServices` key in `appsettings.json | `StorageRootPath` | `textservices-data` | Root directory of the text artefact store. Must point to the same location as the Builder API's `Storage:RootPath`. | | `PdfTriggerQueueCapacity` | `50` | Maximum number of PDF trigger requests that can be queued for background generation. Requests beyond this limit receive `503 Service Unavailable`. | | `PdfTriggerMaxConcurrency` | `2` | Maximum number of PDFs generated concurrently by the background trigger queue. Each in-flight generation buffers the full PDF in memory — keep this low on memory-constrained hosts. | +| `AllowFileImageProxy` | `false` | When `true`, the `/proxy/image` endpoint streams local `file://` images. Only enable in trusted local-dev environments where those files are not access-controlled. | +| `AllowedCustomHosts` | `[]` | Hostnames accepted from the `X-Forwarded-Host` request header (e.g. custom CloudFront distributions). See [Forwarded-header URL rewriting](#forwarded-header-url-rewriting) below. | + +--- + +## Forwarded-header URL rewriting + +When the Search API sits behind a reverse proxy that rewrites the public URL (e.g. a CloudFront +distribution with a custom domain), the `id` values in IIIF responses must reflect the +public-facing URL rather than the internal one. + +Configure `AllowedCustomHosts` with the public hostnames you trust: + +```json +{ + "TextServices": { + "AllowedCustomHosts": ["custom.example.org"] + } +} +``` + +When a request arrives carrying `X-Forwarded-Host: custom.example.org` and that value matches +an entry in `AllowedCustomHosts`: + +- The host in all generated IIIF URLs is replaced with the forwarded host. +- If `X-Forwarded-Path` is also present, the Search API extracts the effective job ID from it + (stripping the route prefix), so the `id` values in the response reflect the public path + rather than the internal route. This is useful when the proxy maps a path like + `/iiif/search/my-book` to the internal `/search/v2/my-book`. + +Hosts not in `AllowedCustomHosts` are always ignored, regardless of what headers the request +carries. The default empty array means `X-Forwarded-Host` is never honoured. All responses include `Access-Control-Allow-Origin: *`. The Search API is entirely read-only, so open CORS is required by the IIIF specification and safe without restriction. diff --git a/instructions/alternative-paths.md b/instructions/alternative-paths.md new file mode 100644 index 0000000..4e16ea8 --- /dev/null +++ b/instructions/alternative-paths.md @@ -0,0 +1,106 @@ +## Path Rewrites + +By paths generated for the search api are: + +* `/search/v2/{**id}?q={term}` +* `/search/v1/{**id}?q={term}` +* `/autocomplete/v2/{**id}?q={term}` +* `/autocomplete/v1/{**id}?q={term}` +* `/annotations/lines/v1/{n}/{**id}` +* `/annotations/words/v1/{n}/{**id}` +* `/text-augmented/v3/{**id}` +* `/proxy/image?uri={uri}` +* `/text/v1/{**id}` +* `/pdf/v1/{**id}` +* `/identified/figures/{**id}` + +These are rendered onto generated Manifest using `{protocol}://{host}/{above-path}` + +### Canonical Paths + +This is the "as is" processing. + +Currently we have a `SearchBaseUrl` (say `https://search.default`). When generating a Manifest this is used to construct every `id`, so the above list of paths are appended to `SearchBaseUrl`. + +The important thing is the `{**id}` is _always_ the job-id, it can contain any number of slashes and is replaced in it's entirety. + +### Requirement + +We need to be able to have some degree of control over what paths are rendered when returning. To do so we will support X-Forwarded-Host and X-Forwarded-Path + +We need to be a way to be able to translate incoming requests, so that they reflect in the outgoing request - without adding any sort of rule-based _stuff_. + +### Solution + +One solution to this is `X-Forwarded-Proto`, `X-Forwarded-Host` (standard HTTP headers) and `X-Forwarded-Path` (non-standard). These will all be added by proxy (e.g. CloudFront), if there are rewrite rules in place. + +* `X-Forwarded-Proto` - configured via standard middleware. Ensure that the HttpContext has appropriate protocol. +* `X-Forwarded-Host` - will be used for the host if it is part of known whitelist. Added by proxy. +* `X-Forwarded-Path` - will be used if it is accompanied by a whitelisted `X-Forwarded-Host`. Added by proxy. + * This isn't perfect adds an degree of safety. You can only rewrite path + host if we expect the host. + * If we want to rewrite a path for canonical host it would need to be whitelisted, which feels like a safe trade-off + +### Examples + +Below examples work through requirements. Assume we're requesting the text-augmented adjunct, this looks at resulting value for `/autocomplete/v1` path. For all of these examples: +* Canonical hostname is `search.default` +* JobId is `2/cc/123` +* The actual http request that hits the search API is `https://search.default/text-augmented/v3/2/cc/123` + +| Incoming (maybe via proxy) | X-Forwarded-Host | X-Forwarded-Path | Autocomplete `id` | Notes | +| ------------------------------------------------- | ---------------- | -------------------------- | ----------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| https://search.default/text-augmented/v3/2/cc/123 | | | https://search.default/autocomplete/v1/2/cc/123 | Default, no proxy | +| https://unknown.host/text-augmented/v3/2/cc/123 | unknown.host | | https://search.default/autocomplete/v1/2/cc/123 | x-forwarded-host but unknown | +| https://unknown.host/text-augmented/v3/2/cc/123 | | text-augmented/v3/2/cc/123 | https://search.default/autocomplete/v1/2/cc/123 | x-forwarded-path but no accompanying x-forwarded-host | +| https://unknown.host/text-augmented/v3/2/cc/123 | unknown.host | text-augmented/v3/2/cc/123 | https://search.default/autocomplete/v1/2/cc/123 | x-forwarded-path but accompanying x-forwarded-host is unknown | +| https://known.host/text-augmented/v3/2/cc/123 | known.host | | https://known.host/autocomplete/v1/2/cc/123 | x-forwarded-host is whitelisted | +| https://known.host/text-augmented/v3/cc/123 | known.host | text-augmented/v3/cc/123 | https://known.host/autocomplete/v1/cc/123 | x-forwarded-host is whitelisted and x-forwarded-path is set (crucially it is NOT the `id`) | +| https://known.host/text-augmented/v3/cc/123 | known.host | | https://known.host/autocomplete/v1/2/cc/123 | x-forwarded-host is whitelisted. x-forwarded-path not set so `id` is used. This would be a misconfigured proxy | + +> [!NOTE] +> Some points to now from above: +> * The above outlines how `id` path is constructed for autocomplete path on generated Manifest but the same process would apply for any generated `id` +> * The `X-Forwarded-Path` may contain a query parameter (e.g. for search results), this should be removed from ids. + +#### Implementation + +The rough implementation would be to use the `X-Forwarded-Path` to determine the `{**id}` element to use in generated paths. + +To do so (assuming `X-Forwarded-Path` is provided and valid) we will remove the current root (minus `{**id}`) from the start of the `X-Forwarded-Path`, this will yield the usable `id` for path generation. + +### Implementation + +All forwarded-header logic is centralised in `EndpointHelpers.Resolve` (`Features/EndpointHelpers.cs`), which reads `X-Forwarded-Host` and `X-Forwarded-Path` once and returns a `ResolvedRequest` record: + +```csharp +internal record ResolvedRequest(string EffectiveId, string SelfUrl, string BaseUrl); +``` + +* `EffectiveId` — the job id to use in generated URLs (extracted from `X-Forwarded-Path` when the host is whitelisted; otherwise the original route id). +* `SelfUrl` — absolute URL for the current endpoint, already incorporating the effective id and optional query term. +* `BaseUrl` — scheme + authority only; used by `TextAugmentedEndpoints` as the base for all cross-endpoint service URLs. + +Every endpoint calls `Resolve` once: + +```csharp +var resolved = EndpointHelpers.Resolve(options.Value, ctx, "search/v1/", id, q); +// resolved.SelfUrl → passed to the handler as the response @id +// resolved.BaseUrl → used by TextAugmented to build service descriptor URLs +// resolved.EffectiveId → passed to TextAugmentedRequest as UrlId (see below) +``` + +`X-Forwarded-Proto` is handled separately by `ForwardedHeadersMiddleware` (configured in `ServiceCollectionExtensions.ConfigureForwardedHeaders`), which sets `Request.Scheme`. Trusted sources are restricted via `KnownNetworks` / `KnownProxies` config keys. + +#### TextAugmented specifics + +`TextAugmentedHandler` builds cross-endpoint URLs using both a storage id (to load artefacts) and a URL id (to generate service descriptors). These differ when `X-Forwarded-Path` rewrites the id. `TextAugmentedRequest` carries both: + +```csharp +record TextAugmentedRequest(string Id, string SelfUrl, string SearchBaseUrl, string? UrlId = null) +``` + +The handler uses `UrlId ?? Id` for URL generation and `Id` for all storage lookups. The endpoint passes `resolved.EffectiveId` as `UrlId`. + +#### Allowlist configuration + +Permitted custom hosts are configured under `TextServices:AllowedCustomHosts` in `appsettings.json`. An empty array (the default) means both `X-Forwarded-Host` and `X-Forwarded-Path` are always ignored. \ No newline at end of file diff --git a/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs b/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs index de14da1..1291721 100644 --- a/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs +++ b/src/TextServices.Search.Api/Configuration/SearchApiOptions.cs @@ -57,4 +57,12 @@ public class SearchApiOptions /// /// public bool AllowFileImageProxy { get; set; } = false; + + /// + /// Hostnames accepted from the X-Forwarded-Host request header (e.g. custom CloudFront distributions). + /// When a request carries X-Forwarded-Host and its value matches an entry here, that host + /// replaces the canonical host in generated IIIF URLs. An empty array (the default) means + /// X-Forwarded-Host is never honoured. + /// + public string[] AllowedCustomHosts { get; set; } = []; } diff --git a/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs b/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs index 41cb740..da74253 100644 --- a/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs +++ b/src/TextServices.Search.Api/Configuration/ServiceCollectionExtensions.cs @@ -1,4 +1,7 @@ -using TextServices.Pdf; +using Microsoft.AspNetCore.HttpOverrides; +using Serilog; +using Serilog.Extensions.Logging; +using TextServices.Pdf; using TextServices.Search.Api.Features.Pdf; namespace TextServices.Search.Api.Configuration; @@ -20,4 +23,56 @@ public static IServiceCollection AddPdfServices(this IServiceCollection services return services; } + + /// + /// Configures host to use x-forwarded-proto to set httpContext.Request.Scheme + /// "KnownNetworks" (CIDR ranges) and/or "KnownProxies" (individual IPs) configuration keys restrict which + /// upstream sources are trusted. If neither is present, headers are accepted from all sources (with a warning). + /// + public static IServiceCollection ConfigureForwardedHeaders(this IServiceCollection services, + IConfiguration configuration) + { + var knownNetworks = configuration.GetValue("KnownNetworks"); + var knownProxies = configuration.GetValue("KnownProxies"); + + var logger = new SerilogLoggerFactory(Log.Logger).CreateLogger("ServiceCollection"); + + return services.Configure(opts => + { + opts.ForwardedHeaders = ForwardedHeaders.XForwardedProto; + + var networks = knownNetworks.SplitSeparatedString(",").ToList(); + var proxies = knownProxies.SplitSeparatedString(",").ToList(); + + if (networks.Count == 0 && proxies.Count == 0) + { + logger.LogWarning("Forwarded header values accepted from all networks and proxies"); + opts.KnownIPNetworks.Clear(); + opts.KnownProxies.Clear(); + } + else + { + if (networks.Count > 0) + { + logger.LogInformation("Forwarded header values accepted from networks: {KnownNetworks}", knownNetworks); + foreach (var network in networks) + { + opts.KnownIPNetworks.Add(System.Net.IPNetwork.Parse(network)); + } + } + + if (proxies.Count > 0) + { + logger.LogInformation("Forwarded header values accepted from proxies: {KnownProxies}", knownProxies); + foreach (var proxy in proxies) + { + opts.KnownProxies.Add(System.Net.IPAddress.Parse(proxy)); + } + } + } + }); + } + + private static IEnumerable SplitSeparatedString(this string? str, string separator) + => str?.Trim().Split(separator, StringSplitOptions.RemoveEmptyEntries) ?? Enumerable.Empty(); } diff --git a/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs b/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs index 069b681..cc20fae 100644 --- a/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Annotations/AnnotationEndpoints.cs @@ -14,8 +14,8 @@ internal static IEndpointRouteBuilder MapAnnotationEndpoints(this IEndpointRoute IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"annotations/manifest/v1/{id}", null); - var result = await sender.Send(new ManifestAnnotationsRequest(id, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "annotations/manifest/v1/", id); + var result = await sender.Send(new ManifestAnnotationsRequest(id, resolved.SelfUrl)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); @@ -26,8 +26,8 @@ internal static IEndpointRouteBuilder MapAnnotationEndpoints(this IEndpointRoute IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"annotations/lines/v1/{n}/{id}", null); - var result = await sender.Send(new LineAnnotationsRequest(id, n, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, $"annotations/lines/v1/{n}/", id); + var result = await sender.Send(new LineAnnotationsRequest(id, n, resolved.SelfUrl)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); @@ -38,8 +38,8 @@ internal static IEndpointRouteBuilder MapAnnotationEndpoints(this IEndpointRoute IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"annotations/words/v1/{n}/{id}", null); - var result = await sender.Send(new WordAnnotationsRequest(id, n, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, $"annotations/words/v1/{n}/", id); + var result = await sender.Send(new WordAnnotationsRequest(id, n, resolved.SelfUrl)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); diff --git a/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs b/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs index d1211dd..30b2ed9 100644 --- a/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Autocomplete/AutocompleteEndpoints.cs @@ -14,8 +14,8 @@ internal static IEndpointRouteBuilder MapAutocompleteEndpoints(this IEndpointRou IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"autocomplete/v1/{id}", q); - var result = await sender.Send(new AutocompleteRequest(id, q ?? string.Empty, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "autocomplete/v1/", id, q); + var result = await sender.Send(new AutocompleteRequest(id, q ?? string.Empty, resolved.SelfUrl)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); @@ -26,8 +26,8 @@ internal static IEndpointRouteBuilder MapAutocompleteEndpoints(this IEndpointRou IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"autocomplete/v2/{id}", q); - var result = await sender.Send(new AutocompleteV2Request(id, q ?? string.Empty, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "autocomplete/v2/", id, q); + var result = await sender.Send(new AutocompleteV2Request(id, q ?? string.Empty, resolved.SelfUrl)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); diff --git a/src/TextServices.Search.Api/Features/EndpointHelpers.cs b/src/TextServices.Search.Api/Features/EndpointHelpers.cs index 927f9f8..5e97e58 100644 --- a/src/TextServices.Search.Api/Features/EndpointHelpers.cs +++ b/src/TextServices.Search.Api/Features/EndpointHelpers.cs @@ -2,16 +2,47 @@ namespace TextServices.Search.Api.Features; +/// +/// Holds the resolved URL components for a single endpoint request, accounting for +/// X-Forwarded-Host and X-Forwarded-Path proxy headers. +/// +/// +/// Job id to use in generated IIIF URLs. Extracted from X-Forwarded-Path when a +/// whitelisted host is present; otherwise equals the original route id. +/// +/// Absolute URL for this endpoint response (base + route prefix + effective id + query). +/// Absolute URL without query string. Used as the base for child resource IDs (e.g. annotations). +/// Scheme + authority only (no path). Used by TextAugmented to build cross-endpoint service URLs. +internal record ResolvedRequest(string EffectiveId, string SelfUrl, string ResourceUrl, string BaseUrl); + internal static class EndpointHelpers { - internal static string BuildSelfUrl(SearchApiOptions opts, HttpContext ctx, string path, string? q) + /// + /// Resolves the effective id, self URL, and base URL for an endpoint in a single pass over + /// the forwarded headers. + /// + /// Current HTTP context object + /// + /// The path segment before {**id}, e.g. "search/v1/" or + /// "annotations/lines/v1/3/". Used to strip the prefix from + /// X-Forwarded-Path to extract the forwarded id. + /// + /// The job id from the route parameter. + /// Optional query term, appended as ?q=… on the self URL. + /// object + internal static ResolvedRequest Resolve(SearchApiOptions opts, HttpContext ctx, string routePrefix, + string originalId, string? q = null) { - var baseUrl = string.IsNullOrEmpty(opts.BaseUrl) - ? $"{ctx.Request.Scheme}://{ctx.Request.Host}" - : opts.BaseUrl.TrimEnd('/'); + var forwardedHost = ctx.Request.Headers["X-Forwarded-Host"].FirstOrDefault(); + var forwardedPath = ctx.Request.Headers["X-Forwarded-Path"].FirstOrDefault(); + + var baseUrl = ResolveBaseUrl(opts, ctx, forwardedHost); + var effectiveId = ResolveId(forwardedHost, forwardedPath, opts.AllowedCustomHosts, routePrefix, originalId); - var url = $"{baseUrl}/{path}"; - return string.IsNullOrWhiteSpace(q) ? url : $"{url}?q={Uri.EscapeDataString(q)}"; + var url = $"{baseUrl}/{routePrefix.TrimEnd('/')}/{effectiveId}"; + var selfUrl = string.IsNullOrWhiteSpace(q) ? url : $"{url}?q={Uri.EscapeDataString(q)}"; + + return new ResolvedRequest(effectiveId, selfUrl, url, baseUrl); } internal static string[]? GetIgnoredParams(HttpContext ctx) @@ -23,4 +54,41 @@ internal static string BuildSelfUrl(SearchApiOptions opts, HttpContext ctx, stri .ToArray(); return ignored.Length > 0 ? ignored : null; } + + private static string ResolveBaseUrl(SearchApiOptions opts, HttpContext ctx, string? forwardedHost) + { + if (!string.IsNullOrEmpty(opts.BaseUrl)) + { + var baseUri = new Uri(opts.BaseUrl); + var host = IsAllowedCustomHost(forwardedHost, opts.AllowedCustomHosts) + ? forwardedHost! + : baseUri.Authority; + return $"{baseUri.Scheme}://{host}"; + } + + var effectiveHost = IsAllowedCustomHost(forwardedHost, opts.AllowedCustomHosts) + ? forwardedHost! + : ctx.Request.Host.ToString(); + return $"{ctx.Request.Scheme}://{effectiveHost}"; + } + + private static string ResolveId( + string? forwardedHost, string? forwardedPath, string[] allowlist, string routePrefix, string originalId) + { + if (!IsAllowedCustomHost(forwardedHost, allowlist)) return originalId; + if (string.IsNullOrEmpty(forwardedPath)) return originalId; + + var pathOnly = forwardedPath.Split('?')[0]; + var normalised = pathOnly.TrimStart('/'); + var prefix = routePrefix.TrimStart('/'); + + if (!normalised.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) return originalId; + + var extracted = normalised[prefix.Length..].TrimStart('/'); + return string.IsNullOrEmpty(extracted) ? originalId : extracted; + } + + private static bool IsAllowedCustomHost(string? host, string[] allowlist) + => !string.IsNullOrEmpty(host) + && allowlist.Contains(host, StringComparer.OrdinalIgnoreCase); } diff --git a/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs b/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs index 8373509..fcf84ed 100644 --- a/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Figures/FiguresEndpoints.cs @@ -14,8 +14,8 @@ internal static IEndpointRouteBuilder MapFiguresEndpoints(this IEndpointRouteBui IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"identified/figures/{id}", null); - var result = await sender.Send(new FiguresRequest(id, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "identified/figures/", id); + var result = await sender.Send(new FiguresRequest(id, resolved.SelfUrl)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); diff --git a/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs b/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs index b07404e..00df646 100644 --- a/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Pdf/PdfEndpoints.cs @@ -23,15 +23,12 @@ internal static IEndpointRouteBuilder MapPdfEndpoints(this IEndpointRouteBuilder HttpContext ctx) => { id = StripPdfExtension(id); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "pdf/v1/", id); var result = await sender.Send(new PdfTriggerRequest(id)); return result switch { - PdfTriggerResult.AlreadyExists => Results.Ok(new - { - location = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"pdf/v1/{id}", null) - }), - PdfTriggerResult.Queued => Results.Accepted( - EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"pdf/v1/{id}", null)), + PdfTriggerResult.AlreadyExists => Results.Ok(new { location = resolved.SelfUrl }), + PdfTriggerResult.Queued => Results.Accepted(resolved.SelfUrl), PdfTriggerResult.ServiceBusy => new ServiceBusyResult(), _ => Results.NotFound(), }; diff --git a/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs b/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs index dcf5a45..c6b480b 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchEndpoints.cs @@ -14,8 +14,8 @@ internal static IEndpointRouteBuilder MapSearchEndpoints(this IEndpointRouteBuil IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"search/v1/{id}", q); - var result = await sender.Send(new SearchRequest(id, q ?? string.Empty, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "search/v1/", id, q); + var result = await sender.Send(new SearchRequest(id, q ?? string.Empty, resolved.SelfUrl, resolved.ResourceUrl)); if (result == null) return Results.NotFound(); result.Ignored = EndpointHelpers.GetIgnoredParams(ctx); return Results.Json(result, contentType: "application/ld+json"); @@ -27,8 +27,8 @@ internal static IEndpointRouteBuilder MapSearchEndpoints(this IEndpointRouteBuil IOptions options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"search/v2/{id}", q); - var result = await sender.Send(new SearchV2Request(id, q ?? string.Empty, selfUrl)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "search/v2/", id, q); + var result = await sender.Send(new SearchV2Request(id, q ?? string.Empty, resolved.SelfUrl, resolved.ResourceUrl)); if (result == null) return Results.NotFound(); result.Ignored = EndpointHelpers.GetIgnoredParams(ctx); return Results.Json(result, contentType: "application/ld+json"); diff --git a/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs b/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs index b19257e..db9bf17 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchHandlerBase.cs @@ -6,19 +6,19 @@ namespace TextServices.Search.Api.Features.Search; public abstract class SearchHandlerBase(ITextCache cache) { - protected async Task HandleCore(string id, string query, string selfUrl, CancellationToken ct) + protected async Task HandleCore(string id, string query, string selfUrl, string resourceUrl, CancellationToken ct) { if (!await cache.IsEnabledAsync(id, JobServices.Search, ct)) return default; if (string.IsNullOrWhiteSpace(query)) - return EmptyQueryResponse(selfUrl); + return EmptyQueryResponse(selfUrl, resourceUrl); var text = await cache.GetTextAsync(id, ct); if (text == null) return default; - return BuildResponse(text, text.Search(query), selfUrl); + return BuildResponse(text, text.Search(query), selfUrl, resourceUrl); } - protected abstract TResponse EmptyQueryResponse(string selfUrl); - protected abstract TResponse BuildResponse(Text text, List rects, string selfUrl); + protected abstract TResponse EmptyQueryResponse(string selfUrl, string resourceUrl); + protected abstract TResponse BuildResponse(Text text, List rects, string selfUrl, string resourceUrl); } diff --git a/src/TextServices.Search.Api/Features/Search/SearchQuery.cs b/src/TextServices.Search.Api/Features/Search/SearchQuery.cs index fabe885..972fd4e 100644 --- a/src/TextServices.Search.Api/Features/Search/SearchQuery.cs +++ b/src/TextServices.Search.Api/Features/Search/SearchQuery.cs @@ -5,18 +5,18 @@ namespace TextServices.Search.Api.Features.Search; -public record SearchRequest(string Id, string Query, string SelfUrl) : IRequest; +public record SearchRequest(string Id, string Query, string SelfUrl, string ResourceUrl) : IRequest; public class SearchHandler(ITextCache cache) : SearchHandlerBase(cache), IRequestHandler { public Task Handle(SearchRequest request, CancellationToken ct) - => HandleCore(request.Id, request.Query, request.SelfUrl, ct); + => HandleCore(request.Id, request.Query, request.SelfUrl, request.ResourceUrl, ct); - protected override SearchAnnotationList EmptyQueryResponse(string selfUrl) => + protected override SearchAnnotationList EmptyQueryResponse(string selfUrl, string resourceUrl) => new() { Id = selfUrl, Within = new SearchLayer { Total = 0 }, Resources = [], Hits = [] }; - protected override SearchAnnotationList BuildResponse(Text text, List rects, string selfUrl) + protected override SearchAnnotationList BuildResponse(Text text, List rects, string selfUrl, string resourceUrl) { var resources = new List(rects.Count); var hits = new List(); @@ -29,7 +29,7 @@ protected override SearchAnnotationList BuildResponse(Text text, List; +public record SearchV2Request(string Id, string Query, string SelfUrl, string ResourceUrl) : IRequest; public class SearchV2Handler(ITextCache cache) : SearchHandlerBase(cache), IRequestHandler { public Task Handle(SearchV2Request request, CancellationToken ct) - => HandleCore(request.Id, request.Query, request.SelfUrl, ct); + => HandleCore(request.Id, request.Query, request.SelfUrl, request.ResourceUrl, ct); - protected override SearchAnnotationPageV2 EmptyQueryResponse(string selfUrl) => + protected override SearchAnnotationPageV2 EmptyQueryResponse(string selfUrl, string resourceUrl) => new() { Id = selfUrl, Items = [], Annotations = null }; - protected override SearchAnnotationPageV2 BuildResponse(Text text, List rects, string selfUrl) + protected override SearchAnnotationPageV2 BuildResponse(Text text, List rects, string selfUrl, string resourceUrl) { var items = new List(rects.Count); var contexts = new List(); @@ -40,13 +40,13 @@ protected override SearchAnnotationPageV2 BuildResponse(Text text, List options, HttpContext ctx) => { - var selfUrl = EndpointHelpers.BuildSelfUrl(options.Value, ctx, $"text-augmented/v3/{id}", null); - var searchBase = string.IsNullOrEmpty(options.Value.BaseUrl) - ? $"{ctx.Request.Scheme}://{ctx.Request.Host}" - : options.Value.BaseUrl.TrimEnd('/'); - - var result = await sender.Send(new TextAugmentedRequest(id, selfUrl, searchBase)); + var resolved = EndpointHelpers.Resolve(options.Value, ctx, "text-augmented/v3/", id); + var result = await sender.Send(new TextAugmentedRequest(id, resolved.SelfUrl, resolved.BaseUrl, UrlId: resolved.EffectiveId)); if (result == null) return Results.NotFound(); return Results.Json(result, contentType: "application/ld+json"); }); diff --git a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs index ecbdfd9..c295965 100644 --- a/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs +++ b/src/TextServices.Search.Api/Features/TextAugmented/TextAugmentedQuery.cs @@ -12,7 +12,13 @@ namespace TextServices.Search.Api.Features.TextAugmented; /// type "SearchService1" / "AutoCompleteService1" (IIIF Search 1, legacy) /// No @context is emitted inside service blocks — it belongs only at document level. /// -public record TextAugmentedRequest(string Id, string SelfUrl, string SearchBaseUrl) +/// Storage key used to load artefacts from the text store. +/// +/// Id to use when generating IIIF service URLs. Differs from when the +/// request arrived via a proxy that rewrites the path (X-Forwarded-Path). Defaults to +/// when null. +/// +public record TextAugmentedRequest(string Id, string SelfUrl, string SearchBaseUrl, string? UrlId = null) : IRequest; public class TextAugmentedHandler(ITextStore textStore, ITextCache textCache) @@ -35,12 +41,14 @@ public class TextAugmentedHandler(ITextStore textStore, ITextCache textCache) // Build service descriptors using Presentation 3 id/type conventions. // v2 is listed first; v1 follows for backward-compatible clients. + + // TODO - de-duplicate when adding services var base_ = request.SearchBaseUrl; - var id = request.Id; + var id = request.UrlId ?? request.Id; var searchServiceV2 = new JsonObject { - ["id"] = $"{base_}/search/v2/{id}", + ["id"] = $"{base_}/search/v2/{id}", // TODO - can we build these from a central place? ["type"] = "SearchService2", ["service"] = new JsonArray(new JsonObject { diff --git a/src/TextServices.Search.Api/Program.cs b/src/TextServices.Search.Api/Program.cs index a339c4e..5546457 100644 --- a/src/TextServices.Search.Api/Program.cs +++ b/src/TextServices.Search.Api/Program.cs @@ -56,6 +56,7 @@ // ---- Configuration ---------------------------------------------------------- builder.Services.Configure(builder.Configuration.GetSection("TextServices")); +builder.Services.ConfigureForwardedHeaders(builder.Configuration); // ---- Storage ---------------------------------------------------------------- @@ -96,6 +97,7 @@ if (app.Environment.IsDevelopment()) app.MapOpenApi(); +app.UseForwardedHeaders(); app.UseMiddleware(); app.UseSerilogRequestLogging(opts => opts.GetLevel = (ctx, _, _) => diff --git a/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs b/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs index 4144a0f..1e4ecbd 100644 --- a/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs +++ b/src/TextServices.Tests/SearchApi/CapabilityGatingTests.cs @@ -33,7 +33,7 @@ public async Task Search_WhenSearchFlagAbsent_ReturnsNull() var handler = new SearchHandler(new StubTextCache(JobServices.Autocomplete)); var result = await handler.Handle( - new SearchRequest(Id, "hello", SelfUrl), CancellationToken.None); + new SearchRequest(Id, "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldBeNull(); } @@ -45,7 +45,7 @@ public async Task Search_WhenCapabilitiesNull_ProceedsToTextLookup() var handler = new SearchHandler(new StubTextCache(null)); var result = await handler.Handle( - new SearchRequest(Id, "hello", SelfUrl), CancellationToken.None); + new SearchRequest(Id, "hello", SelfUrl, SelfUrl), CancellationToken.None); // Gating passed; null result because text is absent, not because of gating. result.ShouldBeNull(); diff --git a/src/TextServices.Tests/SearchApi/EndpointHelpersTests.cs b/src/TextServices.Tests/SearchApi/EndpointHelpersTests.cs new file mode 100644 index 0000000..d590b39 --- /dev/null +++ b/src/TextServices.Tests/SearchApi/EndpointHelpersTests.cs @@ -0,0 +1,165 @@ +using Microsoft.AspNetCore.Http; +using Shouldly; +using TextServices.Search.Api.Configuration; +using TextServices.Search.Api.Features; + +namespace TextServices.Tests.SearchApi; + +public class EndpointHelpersTests +{ + // ------------------------------------------------------------------------- + // Resolve — no forwarding (baseline) + // ------------------------------------------------------------------------- + + [Fact] + public void Resolve_NoForwardedHeaders_UsesBaseUrlAndOriginalId() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("2/cc/123"); + result.SelfUrl.ShouldBe("https://canonical.search/search/v1/2/cc/123"); + result.BaseUrl.ShouldBe("https://canonical.search"); + } + + [Fact] + public void Resolve_NoBaseUrl_UsesRequestSchemeAndHost() + { + var opts = Opts(); + var result = EndpointHelpers.Resolve(opts, Context(scheme: "https", host: "request.host"), "search/v1/", "a/b"); + result.SelfUrl.ShouldBe("https://request.host/search/v1/a/b"); + result.BaseUrl.ShouldBe("https://request.host"); + } + + [Fact] + public void Resolve_QueryParam_AppendedToSelfUrl() + { + var opts = Opts(baseUrl: "https://canonical.search"); + var result = EndpointHelpers.Resolve(opts, Context(), "search/v1/", "a/b", "hello world"); + result.SelfUrl.ShouldBe("https://canonical.search/search/v1/a/b?q=hello%20world"); + } + + // ------------------------------------------------------------------------- + // Resolve — X-Forwarded-Host only + // ------------------------------------------------------------------------- + + [Fact] + public void Resolve_KnownForwardedHost_ReplacesHostKeepsOriginalId() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("2/cc/123"); + result.SelfUrl.ShouldBe("https://known.host/search/v1/2/cc/123"); + result.BaseUrl.ShouldBe("https://known.host"); + } + + [Fact] + public void Resolve_UnknownForwardedHost_Ignored() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "evil.com"), "search/v1/", "2/cc/123"); + result.SelfUrl.ShouldBe("https://canonical.search/search/v1/2/cc/123"); + result.BaseUrl.ShouldBe("https://canonical.search"); + } + + [Fact] + public void Resolve_EmptyAllowlist_ForwardedHostIgnored() + { + var opts = Opts(baseUrl: "https://canonical.search"); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host"), "search/v1/", "2/cc/123"); + result.BaseUrl.ShouldBe("https://canonical.search"); + } + + // ------------------------------------------------------------------------- + // Resolve — X-Forwarded-Host + X-Forwarded-Path (id extraction) + // ------------------------------------------------------------------------- + + [Fact] + public void Resolve_KnownHostAndPath_ExtractsIdFromPath() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host", forwardedPath: "search/v1/cc/123"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("cc/123"); + result.SelfUrl.ShouldBe("https://known.host/search/v1/cc/123"); + } + + [Fact] + public void Resolve_ForwardedPathWithLeadingSlash_Handled() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host", forwardedPath: "/search/v1/cc/123"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("cc/123"); + } + + [Fact] + public void Resolve_ForwardedPathWithQueryString_QueryStripped() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host", forwardedPath: "search/v1/cc/123?q=test"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("cc/123"); + result.SelfUrl.ShouldNotContain("test"); + } + + [Fact] + public void Resolve_ForwardedPathPrefixMismatch_ReturnsOriginalId() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host", forwardedPath: "autocomplete/v1/cc/123"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("2/cc/123"); + } + + [Fact] + public void Resolve_ForwardedPathButUnknownHost_PathIgnored() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "unknown.host", forwardedPath: "search/v1/cc/123"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("2/cc/123"); + result.BaseUrl.ShouldBe("https://canonical.search"); + } + + [Fact] + public void Resolve_ForwardedPathWithNoHost_PathIgnored() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedPath: "search/v1/cc/123"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("2/cc/123"); + } + + [Fact] + public void Resolve_KnownHostCaseInsensitive() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["Known.Host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host", forwardedPath: "search/v1/cc/123"), "search/v1/", "2/cc/123"); + result.EffectiveId.ShouldBe("cc/123"); + } + + [Fact] + public void Resolve_TextAugmentedRoute_ExtractsId() + { + var opts = Opts(baseUrl: "https://canonical.search", allowedHosts: ["known.host"]); + var result = EndpointHelpers.Resolve(opts, Context(forwardedHost: "known.host", forwardedPath: "text-augmented/v3/cc/123"), "text-augmented/v3/", "2/cc/123"); + result.EffectiveId.ShouldBe("cc/123"); + result.SelfUrl.ShouldBe("https://known.host/text-augmented/v3/cc/123"); + result.BaseUrl.ShouldBe("https://known.host"); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private static SearchApiOptions Opts(string baseUrl = "", string[]? allowedHosts = null) => + new() { BaseUrl = baseUrl, AllowedCustomHosts = allowedHosts ?? [] }; + + private static HttpContext Context( + string scheme = "http", + string host = "localhost", + string? forwardedHost = null, + string? forwardedPath = null) + { + var ctx = new DefaultHttpContext(); + ctx.Request.Scheme = scheme; + ctx.Request.Host = new HostString(host); + if (forwardedHost != null) ctx.Request.Headers["X-Forwarded-Host"] = forwardedHost; + if (forwardedPath != null) ctx.Request.Headers["X-Forwarded-Path"] = forwardedPath; + return ctx; + } +} diff --git a/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs b/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs index dd0635d..589b0ec 100644 --- a/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs +++ b/src/TextServices.Tests/SearchApi/SearchHandlerTests.cs @@ -27,7 +27,7 @@ public async Task Handle_EmptyQuery_ReturnsEmptyResponse() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Resources.ShouldBeEmpty(); @@ -42,7 +42,7 @@ public async Task Handle_QueryNotFound_ReturnsEmptyResponse() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "parliament", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "parliament", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Resources.ShouldBeEmpty(); @@ -55,7 +55,7 @@ public async Task Handle_TextNotFound_ReturnsNull() var handler = new SearchHandler(new StubTextCache(null)); var result = await handler.Handle( - new SearchRequest("missing/book", "hello", SelfUrl), CancellationToken.None); + new SearchRequest("missing/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldBeNull(); } @@ -72,7 +72,7 @@ public async Task Handle_SingleHit_CorrectAnnotationShape() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "quick", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "quick", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Resources.Count.ShouldBe(1); @@ -94,7 +94,7 @@ public async Task Handle_SingleHit_CorrectHitShape() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "quick", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "quick", SelfUrl, SelfUrl), CancellationToken.None); result!.Hits.Count.ShouldBe(1); @@ -115,7 +115,7 @@ public async Task Handle_ResponseHasCorrectId() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "hello", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result!.Id.ShouldBe(SelfUrl); result.Context.ShouldBe("http://iiif.io/api/search/1/context.json"); @@ -130,13 +130,30 @@ public async Task Handle_MultipleHits_CorrectCount() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "the", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "the", SelfUrl, SelfUrl), CancellationToken.None); result!.Resources.Count.ShouldBe(2); result.Hits.Count.ShouldBe(2); result.Within.Total.ShouldBe(2); } + [Fact] + public async Task Handle_WithQueryInSelfUrl_AnnotationIdDoesNotContainQueryString() + { + var selfUrlWithQuery = $"{SelfUrl}?q=quick"; + var text = BuildText([("https://example.org/c/1", 1000, 1500, "the quick brown fox")]); + var handler = new SearchHandler(new StubTextCache(text)); + + var result = await handler.Handle( + new SearchRequest("test/book", "quick", selfUrlWithQuery, SelfUrl), CancellationToken.None); + + result.ShouldNotBeNull(); + result.Id.ShouldBe(selfUrlWithQuery); + var annoId = result.Resources[0].Id; + annoId.ShouldNotContain("?q="); + annoId.ShouldStartWith(SelfUrl + "/anno/"); + } + [Fact] public async Task Handle_MultiPage_AnnotationsReferenceCorrectCanvas() { @@ -149,7 +166,7 @@ public async Task Handle_MultiPage_AnnotationsReferenceCorrectCanvas() var handler = new SearchHandler(new StubTextCache(text)); var result = await handler.Handle( - new SearchRequest("test/book", "parliament", SelfUrl), CancellationToken.None); + new SearchRequest("test/book", "parliament", SelfUrl, SelfUrl), CancellationToken.None); result!.Resources.Count.ShouldBe(1); result.Resources[0].On.ShouldStartWith(canvas2); diff --git a/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs b/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs index 0cd9fa5..558c993 100644 --- a/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs +++ b/src/TextServices.Tests/SearchApi/SearchV2HandlerTests.cs @@ -26,7 +26,7 @@ public async Task Handle_TextNotFound_ReturnsNull() var handler = new SearchV2Handler(new StubTextCache(null)); var result = await handler.Handle( - new SearchV2Request("missing/book", "hello", SelfUrl), CancellationToken.None); + new SearchV2Request("missing/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldBeNull(); } @@ -38,7 +38,7 @@ public async Task Handle_EmptyQuery_ReturnsEmptyResponse() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Items.ShouldBeEmpty(); @@ -56,7 +56,7 @@ public async Task Handle_TemporalHit_MotivationIsSupplementing() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "hello", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Items.Count.ShouldBe(1); @@ -71,7 +71,7 @@ public async Task Handle_TemporalHit_TargetUsesTemporalFragment() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "hello", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); var target = result.Items[0].Target; @@ -87,7 +87,7 @@ public async Task Handle_TemporalHit_FragmentUsesDecimalPoint() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "hello", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); var target = result.Items[0].Target; @@ -106,7 +106,7 @@ public async Task Handle_TemporalHit_TimesAreInSeconds() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "hello", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); var target = result.Items[0].Target; @@ -125,7 +125,7 @@ public async Task Handle_SpatialHit_MotivationIsPainting() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "quick", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "quick", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Items.Count.ShouldBe(1); @@ -134,6 +134,42 @@ public async Task Handle_SpatialHit_MotivationIsPainting() result.Items[0].Target.ShouldNotContain("#t="); } + [Fact] + public async Task Handle_WithQueryInSelfUrl_AnnotationIdDoesNotContainQueryString() + { + var selfUrlWithQuery = $"{SelfUrl}?q=quick"; + var canvasId = "https://example.org/c/1"; + var text = BuildSpatialText([(canvasId, 1000, 1500, "the quick brown fox")]); + var handler = new SearchV2Handler(new StubTextCache(text)); + + var result = await handler.Handle( + new SearchV2Request("test/book", "quick", selfUrlWithQuery, SelfUrl), CancellationToken.None); + + result.ShouldNotBeNull(); + result.Id.ShouldBe(selfUrlWithQuery); + var annoId = result.Items[0].Id; + annoId.ShouldNotContain("?q="); + annoId.ShouldStartWith(SelfUrl + "/anno/"); + } + + [Fact] + public async Task Handle_WithQueryInSelfUrl_ContextualizingAnnotationIdDoesNotContainQueryString() + { + var selfUrlWithQuery = $"{SelfUrl}?q=quick"; + var canvasId = "https://example.org/c/1"; + var text = BuildSpatialText([(canvasId, 1000, 1500, "the quick brown fox")]); + var handler = new SearchV2Handler(new StubTextCache(text)); + + var result = await handler.Handle( + new SearchV2Request("test/book", "quick", selfUrlWithQuery, SelfUrl), CancellationToken.None); + + result.ShouldNotBeNull(); + result.Annotations.ShouldNotBeNull(); + var contextId = result.Annotations![0].Items[0].Id; + contextId.ShouldNotContain("?q="); + contextId.ShouldStartWith(SelfUrl + "/context/"); + } + // ------------------------------------------------------------------------- // Contextualizing annotations // ------------------------------------------------------------------------- @@ -146,7 +182,7 @@ public async Task Handle_TemporalHit_ContextualizingAnnotationPresent() var handler = new SearchV2Handler(new StubTextCache(text)); var result = await handler.Handle( - new SearchV2Request("test/book", "hello", SelfUrl), CancellationToken.None); + new SearchV2Request("test/book", "hello", SelfUrl, SelfUrl), CancellationToken.None); result.ShouldNotBeNull(); result.Annotations.ShouldNotBeNull();