diff --git a/.github/agents/Tester.agent.md b/.github/agents/Tester.agent.md index 878eee96..fd06cf20 100644 --- a/.github/agents/Tester.agent.md +++ b/.github/agents/Tester.agent.md @@ -43,6 +43,10 @@ Follow the Testing Trophy (Kent C. Dodds), not the Test Pyramid: - **Static analysis** (linters, sanitizers, warnings-as-errors) is always on - **Tests that check output must assert expected values**, not just check non-empty — validate correctness +### Real integration tests for public-API surface (non-negotiable) + +Any new behavior reachable through the public SDK API (`Model::Load`/`Unload`, `Session`, `ChatSession`, catalog ops, web service endpoints) **must** get a "real" integration test under `test/sdk_api/` (the `sdk_integration_tests` binary) that drives the *production entry point* end-to-end against the real backend (real cached model and/or a real in-process `WebService`). An `internal_api` unit test that calls an internal class directly, or stubs the other side of a contract that also lives in this repo, is **not** a substitute. A one-sided unit test passes while the production path is broken — PR #839 proved it (router unit test called `router->Load(alias)` and passed, while `Model::Load` sending a model_id 404'd against the real service). When a feature spans a client/server or local/external boundary, write the round-trip test through the real server, with the same identifiers production uses. + ### Test Patterns for This Project - Disabled live tests use `DISABLED_` prefix, run via `--gtest_also_run_disabled_tests` diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 35c05b04..eeb03428 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -34,6 +34,8 @@ This is a multi-language SDK monorepo. The primary SDKs live under: - Test binaries are in `sdk_v2/cpp/build/Windows/RelWithDebInfo/bin/RelWithDebInfo/` - `sdk_integration_tests.exe` — integration tests (loads models via SharedTestEnv, slow startup) - `foundry_local_tests.exe` — unit tests (no model loading, fast) +- `cache_only_tests.exe` — cache-only / external-service-URL public-API tests (`test/sdk_api/`, separate binary because `Manager` is a singleton) +- `external_mode_tests.exe` — client/server split tests; re-execs itself with `--serve` to host the web service (`test/sdk_api/`, separate binary for the same singleton reason) ## C++ Code Style diff --git a/.github/instructions/cpp-testing.instructions.md b/.github/instructions/cpp-testing.instructions.md index 770314ea..eb43cb79 100644 --- a/.github/instructions/cpp-testing.instructions.md +++ b/.github/instructions/cpp-testing.instructions.md @@ -27,13 +27,26 @@ If a model role prints `(none)`, the test fixtures for that role will skip. Chec ## Test Binary Quick Reference -| Binary | Type | Startup | Count | -|--------|------|---------|-------| -| `foundry_local_tests.exe` | Unit | Fast (no models) | ~700 | -| `sdk_integration_tests.exe` | Integration | Slow (model loading) | ~100 | +| Binary | Type | Source dir | Startup | Count | +|--------|------|-----------|---------|-------| +| `foundry_local_tests.exe` | Unit / internal API | `test/internal_api/` (+ pure-logic `test/sdk_api/`) | Fast (no models) | ~700 | +| `sdk_integration_tests.exe` | Integration / public API | `test/sdk_api/` | Slow (model loading, real web service) | ~100 | +| `cache_only_tests.exe` | Integration / public API (cache-only + external service URL) | `test/sdk_api/` | Moderate (own `Manager` configs) | ~10 | +| `external_mode_tests.exe` | Integration / public API (client/server split) | `test/sdk_api/` | Slow (spawns a `--serve` host child, loads a model) | 2 | + +`cache_only_tests` and `external_mode_tests` live in `test/sdk_api/` but build into their own binaries because `Manager` is a singleton — each needs `Manager` instances configured differently than the shared `SharedTestEnv` host, so they cannot coexist in `sdk_integration_tests`' process. Source location tracks the API surface (public, black-box); the binary split is an orthogonal CMake concern. Always use `--gtest_filter` when iterating on a specific test or small group. The full integration suite takes ~10 minutes. +## Real integration tests are mandatory for public-API surface + +Any new behavior reachable through the public SDK API — `Model::Load`/`Unload`/`IsLoaded`, `Session`/`ChatSession`, catalog operations, the web service endpoints, etc. — **must** get a "real" integration test under `test/sdk_api/` (the `sdk_integration_tests` binary) that drives the *production entry point* end-to-end against the real backend (real cached model and/or a real in-process `WebService`). An `internal_api` unit test that exercises an internal class directly, or stubs the other side of a contract, is **not** a substitute and does not satisfy this requirement. + +Rules: +- When a feature spans a client/server or local/external boundary that both live in this repo, write a **round-trip test that goes through the real server**, not a stub. Stubbed boundary tests are fine *in addition*, never *instead*. +- Drive the same entry point the user calls (`Model::Load()`, not the internal router/manager) so the test exercises identifier translation, serialization, and endpoint resolution exactly as production does. +- Use realistic identifiers. If production passes a model_id, the test must pass a model_id — not an alias that happens to resolve. + ## Inner-loop workflow for a single test When verifying a fix or a new test, never re-run the whole suite via `ctest`. The fast path is: diff --git a/sdk_v2/cpp/CMakeLists.txt b/sdk_v2/cpp/CMakeLists.txt index 7ac905e6..b02d07eb 100644 --- a/sdk_v2/cpp/CMakeLists.txt +++ b/sdk_v2/cpp/CMakeLists.txt @@ -168,6 +168,7 @@ set(FOUNDRY_LOCAL_SOURCES src/inferencing/model_load_manager.cc src/inferencing/generative/chat/onnx_chat_generator.cc src/model.cc + src/model_command_router.cc src/inferencing/generative/openresponses/response_converter.cc src/inferencing/generative/openresponses/response_store.cc src/contracts/responses_json.cc diff --git a/sdk_v2/cpp/docs/ModelCommandRoutingPlan.md b/sdk_v2/cpp/docs/ModelCommandRoutingPlan.md new file mode 100644 index 00000000..eee2ddf2 --- /dev/null +++ b/sdk_v2/cpp/docs/ModelCommandRoutingPlan.md @@ -0,0 +1,211 @@ +# Model Command Routing (External Service Mode) — C++ Implementation Plan + +## Overview + +When the SDK is configured with `Configuration::external_service_url`, the three +**model management commands** must be redirected to that remote Foundry Local +service over HTTP instead of being executed against the in-process model loader: + +| Command | External endpoint | +|-------------|--------------------------------------------| +| List loaded | `GET {url}/models/loaded` | +| Load | `GET {url}/models/load/{url-encoded id}` | +| Unload | `GET {url}/models/unload/{url-encoded id}` | + +The OpenAI-compatible endpoints (`/v1/models`, `/v1/chat/completions`, …) are +**out of scope** — callers reach those directly, not through the SDK. + +In the v1 SDKs this redirection was reimplemented in every language binding +(`ModelLoadManager.cs`, `model_load_manager.py`, `modelLoadManager.ts`, +`model_load_manager.rs`). In v2 it is handled **once** in the C++ core so every +binding inherits it for free. + +**v1 reference implementation:** +- `sdk/cs/src/Detail/ModelLoadManager.cs` +- `sdk/python/src/detail/model_load_manager.py` +- `sdk/js/src/detail/modelLoadManager.ts` +- `sdk/rust/src/detail/model_load_manager.rs` + +--- + +## Why a new component (not augmenting `ModelLoadManager`) + +The v2 `ModelLoadManager` +([`inferencing/model_load_manager.h`](../src/inferencing/model_load_manager.h)) is +**not** the v1 `ModelLoadManager`. Despite the shared name they have completely +different responsibilities: + +- **v1 `ModelLoadManager`** — a thin *router*: three methods, each branching + `if (external_url) HTTP else core`. +- **v2 `ModelLoadManager`** — the *local ORT GenAI lifecycle owner*: holds the + `GenAIModelInstance` map, an `IEpDetector&`, per-model session refcounts, + `RejectNewLoads()`, and `UnloadAll()` drain-on-shutdown. + +Bolting `external_service_url` + HTTP into the v2 class would conflate two +unrelated reasons-to-change ("own in-process ORT GenAI instances" vs. "talk to a +remote server") and its signatures fight it: `LoadModel` returns +`LoadResult{ GenAIModelInstance* }` and `GetLoadedModel` returns a live local +instance — neither exists in external mode. `UnloadAll`, `RejectNewLoads`, EP +detection, and session drain are all meaningless remotely. + +**Decision:** introduce a higher-level façade, `ModelCommandRouter`, that owns the +local-vs-external decision for only the three management commands. The v2 +`ModelLoadManager` stays a pure local lifecycle owner. + +--- + +## Architecture + +``` +flModel.Load/Unload flCatalog.GetLoadedModels + │ │ + ▼ ▼ + Model::Load/Unload/IsLoaded BaseModelCatalog::GetLoadedModels + │ │ + └──────────────┬───────────────┘ + ▼ + ModelCommandRouter ← owns local-vs-external decision + ├── local ──► ModelLoadManager (LoadModel / UnloadModel / GetLoadedModelIds) + └── external ─► http_client → {external_service_url}/models/... +``` + +### Router interface + +```cpp +class ModelCommandRouter { + public: + ModelCommandRouter(std::optional external_service_url, + ModelLoadManager& load_manager, + std::string app_name, + ILogger& logger); + + // Local: ModelLoadManager::LoadModel(local_path, model_id, ep) + // External: GET /models/load/{id} + void Load(std::string_view model_id, std::string_view local_path, ExecutionProvider ep); + + // Local: ModelLoadManager::UnloadModel(model_id) + // External: GET /models/unload/{id} + void Unload(std::string_view model_id); + + // Local: ModelLoadManager::GetLoadedModel(model_id) != nullptr + // External: membership test against ListLoadedModelIds() + bool IsLoaded(std::string_view model_id); + + // Local: ModelLoadManager loaded-map keys + // External: GET /models/loaded (single round-trip, JSON array of ids) + std::vector ListLoadedModelIds(); +}; +``` + +### Key design principles + +1. **`Model` and the catalog become mode-agnostic.** They call the router; they no + longer know external mode exists. `Model` swaps its `ModelLoadManager*` member for + a `ModelCommandRouter*`, wired in the same `Model::Create` / `Manager::CreateModel` + factory that wires it today. +2. **`ModelLoadManager` gains no mode logic.** The router *holds a + `ModelLoadManager&`* and delegates the local branch to it. The only addition to + `ModelLoadManager` is one in-character method, `GetLoadedModelIds()`, that reads + its own loaded map. +3. **List is a single batch call — no N-calls.** + `BaseModelCatalog::GetLoadedModels()` is rewritten to call + `router.ListLoadedModelIds()` **once** and filter `models_` by set membership, + instead of N per-model `IsLoaded()` calls. This also fixes a latent bug: in + external mode the current per-model `IsLoaded()` path always returns empty. +4. **Local inference is untouched.** External mode means "run inference via the + external HTTP endpoints." The inference path fetches `GenAIModelInstance*` directly + in the session layer, never through `Model`/the router, so it needs no change. +5. **Reuse existing infrastructure.** + [`http/http_client.h`](../src/http/http_client.h) (`HttpGetWithResponse`, + `DescribeFailure`) for HTTP + errors, the shared `UrlEncode` helper in + [`utils.h`](../src/utils.h) for the `{id}` path segment, and `Configuration::app_name` for the + `User-Agent`. + +--- + +## Wiring (`Manager`) + +`Manager` owns a `std::unique_ptr`, declared **after** +`model_load_manager_` so it is destroyed first. Construction order: + +``` +model_load_manager_ → model_command_router_ → catalog_ +``` + +The router is constructed with `config_.external_service_url`, +`*model_load_manager_`, `config_.app_name`, and `*logger_`. It is injected into the +catalog ctor and forwarded into each `Model` via the existing `CreateModel` +factory (replacing the `ModelLoadManager&` argument). + +--- + +## Error handling + +External calls use `http::HttpGetWithResponse` and throw on non-2xx / transport +failure via `FL_THROW(FOUNDRY_LOCAL_ERROR_NETWORK, http::DescribeFailure(resp))`, +matching v1 semantics (which threw `FoundryLocalException` on `!IsSuccessStatusCode`). +`ListLoadedModelIds` parses the response body as a JSON array of strings; an empty +body yields an empty list. + +--- + +## Open questions — decisions + +1. **Load of an un-cached model in external mode.** + *Decision: accept the cache-only constraint.* The v2 API is model-object-centric + and the catalog is cache-only in external mode, so callers can only `Load()` a model + already present in their local cache. We ship the model-object path now and **defer** + any `Manager::LoadModelById(id)` escape hatch until there is a concrete need. (v1 + took raw id strings and could load anything the remote had; that capability is + intentionally not reproduced yet.) + +2. **Session-creation guard in external mode.** + *Decision: add an explicit guard.* [`configuration.h`](../src/configuration.h) + documents that session creation is blocked in external mode, but `Session_CreateImpl` + currently has no explicit check — it only fails indirectly (no local instance). Add + an explicit `FL_THROW(FOUNDRY_LOCAL_ERROR_INVALID_USAGE, …)` guard on the + session-creation path when `external_service_url` is set, mirroring the existing + `StartWebService` guard, so the failure is clear and intentional. + +3. **Endpoint timeout.** + *Decision: drop the v1 `timeout` query param; set a deliberate client-side timeout + in the core instead.* Investigation of the v1 bindings showed the `timeout` query + param was **never implemented** — it exists only as a commented-out TODO in C# + (`// { "timeout", ... }`) and Python (`# "timeout": "30"`), and is absent in JS/Rust. + So there is no real capability to preserve. The client-side HTTP timeout was also + inconsistent and accidental rather than designed: Python hard-coded **10s** on all + three calls (effectively a latent bug — a real model load routinely exceeds 10s), + C# used the `HttpClient` **default 100s**, and JS/Rust were unbounded. + + Moving this into the C++ core gives every binding a *single, deliberate* value for + the first time. Use a generous `HttpRequestOptions.timeout` for the load call + (target ~5 minutes — model load can be slow) and the default for list/unload. We do + **not** reproduce Python's 10s. A future server-side `timeout` query param can be + added later if the remote service ever supports one. + +--- + +## Work breakdown + +**Implementation (`@CppCoder`)** +- New `src/model_command_router.{h,cc}`. +- Add `ModelLoadManager::GetLoadedModelIds()` (reads its own loaded map). +- Swap `Model`'s member `ModelLoadManager* → ModelCommandRouter*`; update + `Model::Create` / `Manager::CreateModel` wiring. +- Rewrite `BaseModelCatalog::GetLoadedModels()` to the single-batch path; thread the + router through the catalog ctor. +- Construct/own the router in `Manager` with correct declaration order. +- Add the external-mode session-creation guard (open question #2). + +**Tests (`@Tester`)** +- Router unit tests covering both branches (mock/loopback the HTTP side). +- External-mode integration test mirroring v1 `TestModelLoadManagerExternalService`: + start a real local web service, point a second `Manager` at it via + `external_service_url`, verify load / unload / list round-trip. +- Regression test that `GetLoadedModels()` is correct in external mode. +- Test that session creation throws in external mode. + +**Review (`@Reviewer`)** +- Confirm `ModelLoadManager` gained no mode logic. +- Confirm the router holds no inference/EP concerns. +- Confirm error/ownership semantics match SDK conventions. diff --git a/sdk_v2/cpp/run_coverage.ps1 b/sdk_v2/cpp/run_coverage.ps1 index 7c53012f..eafb937c 100644 --- a/sdk_v2/cpp/run_coverage.ps1 +++ b/sdk_v2/cpp/run_coverage.ps1 @@ -37,7 +37,16 @@ if (-not (Test-Path $opencpp)) { $buildDir = Join-Path $PSScriptRoot "build\Windows\$Config" $binDir = Join-Path $buildDir "bin\$Config" $unitExe = Join-Path $binDir "foundry_local_tests.exe" -$integExe = Join-Path $binDir "sdk_integration_tests.exe" + +# Integration coverage comes from every binary built from test/sdk_api/ sources. They all link the +# public foundry_local_cpp surface and drive real models / a real web service. Manager being a +# singleton forces the public-API suite to be split across several binaries (default, cache-only, +# external-mode client/server) rather than one — so iterate over all of them. +$integExes = @( + (Join-Path $binDir "sdk_integration_tests.exe") + (Join-Path $binDir "cache_only_tests.exe") + (Join-Path $binDir "external_mode_tests.exe") +) $srcDir = Join-Path $PSScriptRoot "src" $covDir = Join-Path $buildDir "coverage" @@ -84,13 +93,23 @@ try { $mergeArgs = @("--input_coverage", $unitCov) if (-not $SkipIntegration) { - if (-not (Test-Path $integExe)) { - Write-Warning "Integration test binary not found: $integExe - skipping integration coverage." - } else { - Write-Host "`n=== Collecting integration test coverage ===" -ForegroundColor Cyan + foreach ($integExe in $integExes) { + $name = [System.IO.Path]::GetFileNameWithoutExtension($integExe) + + if (-not (Test-Path $integExe)) { + Write-Warning "Integration test binary not found: $integExe - skipping." + continue + } + + Write-Host "`n=== Collecting integration test coverage: $name ===" -ForegroundColor Cyan + + $integCov = Join-Path $covDir "$name.cov" - $integCov = Join-Path $covDir "integration.cov" - $integArgs = $commonArgs + @("--export_type", "binary:$integCov", "--") + # --cover_children: external_mode_tests re-execs itself with `--serve` to host the web + # service in a child process — the src/ code under test (web_service, manager shutdown) + # runs there, not in the parent. Without this its coverage would be lost. Harmless for + # the binaries that don't spawn children. + $integArgs = $commonArgs + @("--cover_children", "--export_type", "binary:$integCov", "--") $integArgs += $integExe # Include DISABLED_ tests (e.g. download tests) for maximum coverage. @@ -103,9 +122,9 @@ try { & $opencpp @integArgs if (Test-Path $integCov) { $mergeArgs += @("--input_coverage", $integCov) - Write-Host "Integration coverage saved to $integCov" -ForegroundColor Green + Write-Host "$name coverage saved to $integCov" -ForegroundColor Green } else { - Write-Warning "Integration coverage file was not created - continuing with unit only." + Write-Warning "$name coverage file was not created - continuing without it." } } } diff --git a/sdk_v2/cpp/src/c_api.cc b/sdk_v2/cpp/src/c_api.cc index 076a480c..225084e5 100644 --- a/sdk_v2/cpp/src/c_api.cc +++ b/sdk_v2/cpp/src/c_api.cc @@ -1662,6 +1662,13 @@ FL_API_STATUS_IMPL(Session_CreateImpl, const flModel* model, flSession** out_ses return MakeStatus(FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT, "null argument"); } + // External mode runs inference via the remote HTTP endpoints, not through a local session. + // Fail fast with a clear message instead of failing indirectly when no local instance exists. + if (fl::Manager::Instance().GetConfiguration().external_service_url.has_value()) { + return MakeStatus(FOUNDRY_LOCAL_ERROR_INVALID_USAGE, + "cannot create a local inference session when external_service_url is configured"); + } + auto session = fl::Session::Create(*AsImpl(model)); *out_session = AsHandle(session.release()); return nullptr; diff --git a/sdk_v2/cpp/src/catalog/azure_model_catalog.cc b/sdk_v2/cpp/src/catalog/azure_model_catalog.cc index 430e348a..052fb2b8 100644 --- a/sdk_v2/cpp/src/catalog/azure_model_catalog.cc +++ b/sdk_v2/cpp/src/catalog/azure_model_catalog.cc @@ -16,12 +16,13 @@ namespace fl { AzureModelCatalog::AzureModelCatalog(std::vector>> catalog_urls, std::string cache_dir, ModelFactory model_factory, + ModelCommandRouter& router, const IEpDetector& ep_detector, ILogger& logger, bool cache_only, std::string catalog_region, bool disable_region_fallback) - : BaseModelCatalog(catalog_urls.empty() ? kDefaultCatalogUrl : catalog_urls.front().first, logger), + : BaseModelCatalog(catalog_urls.empty() ? kDefaultCatalogUrl : catalog_urls.front().first, router, logger), catalog_urls_(std::move(catalog_urls)), cache_dir_(std::move(cache_dir)), model_factory_(std::move(model_factory)), diff --git a/sdk_v2/cpp/src/catalog/azure_model_catalog.h b/sdk_v2/cpp/src/catalog/azure_model_catalog.h index 9d837566..cac72493 100644 --- a/sdk_v2/cpp/src/catalog/azure_model_catalog.h +++ b/sdk_v2/cpp/src/catalog/azure_model_catalog.h @@ -14,6 +14,8 @@ namespace fl { +class ModelCommandRouter; + /// Azure-specific catalog. Fetches from Azure Foundry catalog API, /// scans local cache, merges results. /// Maps to C# AzureModelCatalog. @@ -24,6 +26,7 @@ class AzureModelCatalog : public BaseModelCatalog { AzureModelCatalog(std::vector>> catalog_urls, std::string cache_dir, ModelFactory model_factory, + ModelCommandRouter& router, const IEpDetector& ep_detector, ILogger& logger, bool cache_only = false, diff --git a/sdk_v2/cpp/src/catalog/base_model_catalog.cc b/sdk_v2/cpp/src/catalog/base_model_catalog.cc index af8b78cf..05dfa7ee 100644 --- a/sdk_v2/cpp/src/catalog/base_model_catalog.cc +++ b/sdk_v2/cpp/src/catalog/base_model_catalog.cc @@ -2,6 +2,8 @@ // Licensed under the MIT License. #include "catalog/base_model_catalog.h" +#include "model_command_router.h" + #include #include @@ -9,6 +11,7 @@ #include #include #include +#include namespace fl { @@ -88,8 +91,8 @@ bool CompareModelsForSort(const Model& m1, const Model& m2) { } // anonymous namespace -BaseModelCatalog::BaseModelCatalog(std::string name, ILogger& logger) - : name_(std::move(name)), logger_(logger) {} +BaseModelCatalog::BaseModelCatalog(std::string name, ModelCommandRouter& router, ILogger& logger) + : name_(std::move(name)), router_(router), logger_(logger) {} BaseModelCatalog::~BaseModelCatalog() = default; void BaseModelCatalog::PopulateModels(std::vector variants) const { @@ -318,10 +321,17 @@ std::vector BaseModelCatalog::GetCachedModels() const { std::vector BaseModelCatalog::GetLoadedModels() const { EnsurePopulated(); + // Single batch call instead of N per-model IsLoaded() round-trips. This is also correct in + // external mode, where the router fetches the loaded-id set from the remote service in one + // request; the old per-model path always returned empty there. + auto loaded = router_.ListLoadedModelIds(); + std::unordered_set loaded_set(loaded.begin(), loaded.end()); + std::lock_guard lock(mutex_); std::vector result; for (auto& m : models_) { - if (m->IsLoaded()) { + // Container Id() delegates to the selected variant, preserving the prior semantics. + if (loaded_set.count(m->Id()) > 0) { result.push_back(m.get()); } } diff --git a/sdk_v2/cpp/src/catalog/base_model_catalog.h b/sdk_v2/cpp/src/catalog/base_model_catalog.h index ad74d2f8..281b582f 100644 --- a/sdk_v2/cpp/src/catalog/base_model_catalog.h +++ b/sdk_v2/cpp/src/catalog/base_model_catalog.h @@ -14,6 +14,8 @@ namespace fl { +class ModelCommandRouter; + /// BaseModelCatalog: holds model collection, indices, and lookup logic. /// Derived classes implement FetchModels() to provide models from their specific source. /// The base class owns caching, indexing, and thread-safe refresh. @@ -26,7 +28,7 @@ namespace fl { /// Maps to C# BaseModelCatalog. class BaseModelCatalog : public ICatalog { public: - BaseModelCatalog(std::string name, ILogger& logger); + BaseModelCatalog(std::string name, ModelCommandRouter& router, ILogger& logger); ~BaseModelCatalog() override; const std::string& GetName() const override { return name_; } @@ -87,6 +89,7 @@ class BaseModelCatalog : public ICatalog { void EnsurePopulated(bool allow_refresh = false) const; std::string name_; + ModelCommandRouter& router_; ILogger& logger_; }; diff --git a/sdk_v2/cpp/src/download/model_registry_client.cc b/sdk_v2/cpp/src/download/model_registry_client.cc index 334b7da7..7adcbcd6 100644 --- a/sdk_v2/cpp/src/download/model_registry_client.cc +++ b/sdk_v2/cpp/src/download/model_registry_client.cc @@ -21,24 +21,6 @@ std::string BuildBaseUrl(const std::string& region) { return "https://" + region + ".api.azureml.ms/modelregistry/v1.0/registry/models/nonazureaccount?assetId="; } -/// URL-encode a string (percent-encoding). -std::string UrlEncode(const std::string& value) { - std::string result; - result.reserve(value.size() * 2); - for (unsigned char c : value) { - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || - c == '-' || c == '_' || c == '.' || c == '~') { - result += static_cast(c); - } else { - static const char hex[] = "0123456789ABCDEF"; - result += '%'; - result += hex[c >> 4]; - result += hex[c & 0x0F]; - } - } - return result; -} - constexpr const char* kUserAgent = "AzureAiStudio"; } // anonymous namespace diff --git a/sdk_v2/cpp/src/inferencing/model_load_manager.cc b/sdk_v2/cpp/src/inferencing/model_load_manager.cc index 0bc321b9..82130bfe 100644 --- a/sdk_v2/cpp/src/inferencing/model_load_manager.cc +++ b/sdk_v2/cpp/src/inferencing/model_load_manager.cc @@ -270,4 +270,16 @@ GenAIModelInstance* ModelLoadManager::GetLoadedModel(std::string_view model_id) return nullptr; } +std::vector ModelLoadManager::GetLoadedModelIds() const { + std::lock_guard lock(mutex_); + + std::vector ids; + ids.reserve(loaded_models_.size()); + for (const auto& [id, instance] : loaded_models_) { + ids.push_back(id); + } + + return ids; +} + } // namespace fl diff --git a/sdk_v2/cpp/src/inferencing/model_load_manager.h b/sdk_v2/cpp/src/inferencing/model_load_manager.h index f582217a..a560a75a 100644 --- a/sdk_v2/cpp/src/inferencing/model_load_manager.h +++ b/sdk_v2/cpp/src/inferencing/model_load_manager.h @@ -67,6 +67,10 @@ class ModelLoadManager { /// The returned pointer is valid until UnloadModel is called for this model_id. GenAIModelInstance* GetLoadedModel(std::string_view model_id); + /// Get the ids of all currently-loaded models (the keys of the loaded map). + /// Snapshot taken under the lock; the result is a copy, safe to use after the lock is released. + std::vector GetLoadedModelIds() const; + /// Reject all future LoadModel calls. Called by Manager::Shutdown(). /// Idempotent and thread-safe. void RejectNewLoads(); diff --git a/sdk_v2/cpp/src/manager.cc b/sdk_v2/cpp/src/manager.cc index d0a4a15b..e6ff5be3 100644 --- a/sdk_v2/cpp/src/manager.cc +++ b/sdk_v2/cpp/src/manager.cc @@ -20,6 +20,7 @@ #include "exception.h" #include "inferencing/model_load_manager.h" #include "inferencing/session/session_manager.h" +#include "model_command_router.h" #include "spdlog_logger.h" #include "telemetry/telemetry_action_tracker.h" #include "telemetry/telemetry_logger.h" @@ -294,6 +295,10 @@ Manager::Manager(const Configuration& config) *logger_, disable_region_fallback); model_load_manager_ = std::make_unique(*ep_detector_, *logger_); + // The router owns the local-vs-external decision for load/unload/list. Constructed before the + // catalog (which holds it by reference) and after the load manager it delegates to locally. + model_command_router_ = std::make_unique( + config_.external_service_url, *model_load_manager_, config_.app_name, *logger_); session_manager_ = std::make_unique(*logger_); telemetry_ = std::make_unique(config_.app_name, *logger_); catalog_ = std::make_unique( @@ -302,6 +307,7 @@ Manager::Manager(const Configuration& config) [this](ModelInfo info, std::string local_path) { return CreateModel(std::move(info), std::move(local_path)); }, + *model_command_router_, *ep_detector_, *logger_, config_.external_service_url.has_value(), config_.catalog_region.value_or("auto"), @@ -328,6 +334,8 @@ Manager::~Manager() { web_service_.reset(); #endif session_manager_.reset(); + // Router references model_load_manager_, so it must be torn down first. + model_command_router_.reset(); model_load_manager_.reset(); download_manager_.reset(); catalog_.reset(); @@ -428,7 +436,7 @@ void Manager::StartWebService() { #ifdef FOUNDRY_LOCAL_HAS_WEB_SERVICE web_service_ = std::make_unique(*catalog_, *logger_, *config_.model_cache_dir, *model_load_manager_, *session_manager_, *telemetry_, - [this]() { Shutdown(); }); + [this]() { RequestShutdown(); }); auto endpoints = config_.web_service_endpoints; if (endpoints.empty()) { @@ -474,12 +482,14 @@ void Manager::StopWebService() { } void Manager::Shutdown() { + shutdown_requested_.store(true); + bool expected = false; - if (!shutdown_requested_.compare_exchange_strong(expected, true)) { - return; // already shutting down + if (!teardown_started_.compare_exchange_strong(expected, true)) { + return; // teardown already ran } - logger_->Log(LogLevel::Information, "Shutdown requested"); + logger_->Log(LogLevel::Information, "Shutting down"); if (web_service_running_) { StopWebService(); @@ -497,6 +507,13 @@ void Manager::Shutdown() { model_load_manager_->UnloadAll(); } +void Manager::RequestShutdown() { + // Only raise the flag. The host watching IsShutdownRequested() performs the actual teardown from + // its own thread — doing it here would self-deadlock when we're on a web service worker thread, + // because StopWebService() joins every worker thread (including this one). + shutdown_requested_.store(true); +} + bool Manager::IsShutdownRequested() const { return shutdown_requested_.load(); } @@ -509,7 +526,7 @@ Model Manager::CreateModel(ModelInfo info, std::string local_path) { return Model::FromModelInfo(std::move(info), std::move(local_path), *download_manager_, - *model_load_manager_); + *model_command_router_); } DownloadManager& Manager::GetDownloadManager() { diff --git a/sdk_v2/cpp/src/manager.h b/sdk_v2/cpp/src/manager.h index 4b5440db..b98400e3 100644 --- a/sdk_v2/cpp/src/manager.h +++ b/sdk_v2/cpp/src/manager.h @@ -29,6 +29,7 @@ class DownloadManager; class ITelemetry; class Model; class ModelLoadManager; +class ModelCommandRouter; class SessionManager; struct ModelInfo; @@ -86,12 +87,12 @@ class Manager { /// Stop the embedded web service. void StopWebService(); - /// Begin graceful shutdown. Sets the shutdown flag and signals subsystems to drain. - /// Idempotent and non-blocking — safe to call from any thread including web service handlers. - /// Destroy() calls this internally, so direct SDK users don't need to call it explicitly. + /// Tear down all subsystems: stop the web service, cancel and drain sessions, and unload models. + /// Idempotent — the teardown body runs only once. Destroy()/~Manager() call this internally, so + /// direct SDK users don't need to call it explicitly. void Shutdown(); - /// Check if Shutdown() has been called (from any source — web endpoint, signal, user code). + /// Check if shutdown has been requested (from any source — web endpoint, signal, user code). bool IsShutdownRequested() const; /// Get the logger instance. @@ -126,8 +127,12 @@ class Manager { // catalog_ — owns all Model instances. used by download_manager, model_load_manager, and web service // download_manager_ — uses ModelInfo owned by catalog // model_load_manager_ — holds loaded model state referencing catalog models + // model_command_router_ — routes load/unload/list to model_load_manager_ (local) or a + // remote service; destroyed before model_load_manager_, which it + // references // session_manager_ — tracks all active sessions. destroyed after web service, before models // shutdown_requested_ — atomic flag checked by subsystems and the host process + // teardown_started_ — guards Shutdown()'s teardown body so it runs exactly once // web service members — use catalog, model_load_manager, session_manager, telemetry, logger // Configuration config_; @@ -140,8 +145,10 @@ class Manager { std::unique_ptr catalog_; std::unique_ptr download_manager_; std::unique_ptr model_load_manager_; + std::unique_ptr model_command_router_; std::unique_ptr session_manager_; std::atomic shutdown_requested_{false}; + std::atomic teardown_started_{false}; std::atomic web_service_running_{false}; std::vector bound_urls_; @@ -152,6 +159,11 @@ class Manager { private: Model CreateModel(ModelInfo info, std::string local_path); + /// Raise the shutdown-requested flag without tearing anything down. The web service's /shutdown + /// endpoint calls this so a host blocked serving requests can observe IsShutdownRequested() and + /// wind down on its own thread (where calling Shutdown() is safe). Safe to call from any thread. + void RequestShutdown(); + static std::mutex s_mutex_; static std::unique_ptr s_instance_; }; diff --git a/sdk_v2/cpp/src/model.cc b/sdk_v2/cpp/src/model.cc index d7f2b0c7..4a28a29c 100644 --- a/sdk_v2/cpp/src/model.cc +++ b/sdk_v2/cpp/src/model.cc @@ -4,9 +4,9 @@ #include "download/download_manager.h" #include "exception.h" -#include "inferencing/model_load_manager.h" #include "items/item.h" #include "items/text_item.h" +#include "model_command_router.h" #include "utils.h" #include @@ -25,12 +25,12 @@ Model::Model(Model&& other) noexcept cached_(other.cached_.load()), local_path_(std::move(other.local_path_)), download_manager_(other.download_manager_), - model_load_manager_(other.model_load_manager_), + router_(other.router_), variants_(std::move(other.variants_)), selected_variant_(other.selected_variant_) { // After vector move, selected_variant_ still points into the transferred buffer. other.download_manager_ = nullptr; - other.model_load_manager_ = nullptr; + other.router_ = nullptr; other.selected_variant_ = nullptr; } @@ -40,11 +40,11 @@ Model& Model::operator=(Model&& other) noexcept { cached_.store(other.cached_.load()); local_path_ = std::move(other.local_path_); download_manager_ = other.download_manager_; - model_load_manager_ = other.model_load_manager_; + router_ = other.router_; variants_ = std::move(other.variants_); selected_variant_ = other.selected_variant_; other.download_manager_ = nullptr; - other.model_load_manager_ = nullptr; + other.router_ = nullptr; other.selected_variant_ = nullptr; } @@ -58,11 +58,11 @@ Model& Model::operator=(Model&& other) noexcept { Model Model::FromModelInfo(ModelInfo info, std::string local_path, DownloadManager& download_manager, - ModelLoadManager& model_load_manager) { + ModelCommandRouter& router) { Model model; model.info_ = std::move(info); model.download_manager_ = &download_manager; - model.model_load_manager_ = &model_load_manager; + model.router_ = &router; if (!local_path.empty()) { model.cached_ = true; @@ -165,10 +165,10 @@ bool Model::IsLoaded() const { return selected_variant_->IsLoaded(); } - // ModelLoadManager owns the authoritative loaded-instance map. The pointer is set at - // construction and never reassigned, so querying it here stays in sync with paths that - // bypass Model::Load/Unload (e.g., Manager::Shutdown -> ModelLoadManager::UnloadAll). - return model_load_manager_->GetLoadedModel(info_.model_id) != nullptr; + // The router owns the local-vs-external decision. The pointer is set at construction and + // never reassigned, so querying it here stays in sync with paths that bypass + // Model::Load/Unload (e.g., Manager::Shutdown -> ModelLoadManager::UnloadAll). + return router_->IsLoaded(info_.model_id); } // --------------------------------------------------------------------------- @@ -211,13 +211,9 @@ void Model::Load(ExecutionProvider ep) { return; } - // LoadModel is idempotent — it returns kModelAlreadyLoaded if the id is already - // in the load manager's map, so no need for a local short-circuit. - auto result = model_load_manager_->LoadModel(local_path_, info_.model_id, ep); - - if (result.status == ModelLoadManager::LoadStatus::kModelNotFound) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "model not found at path: " + local_path_); - } + // The router decides local-vs-external; the model-not-found check lives there for the + // local branch, so callers see a uniform error regardless of mode. + router_->Load(info_.model_id, local_path_, ep); } void Model::Unload() { @@ -226,8 +222,8 @@ void Model::Unload() { return; } - // UnloadModel is idempotent — returns false if the id isn't loaded. - model_load_manager_->UnloadModel(info_.model_id); + // Idempotent in both modes — unloading an already-unloaded model is a no-op. + router_->Unload(info_.model_id); } void Model::RemoveFromCache() { diff --git a/sdk_v2/cpp/src/model.h b/sdk_v2/cpp/src/model.h index ee666b2f..abd13914 100644 --- a/sdk_v2/cpp/src/model.h +++ b/sdk_v2/cpp/src/model.h @@ -16,7 +16,7 @@ namespace fl { class DownloadManager; struct Item; -class ModelLoadManager; +class ModelCommandRouter; // ----------------------------------------------------------------------- // Model — a single model entry or a multi-variant container. @@ -43,13 +43,13 @@ class Model { /// Create a leaf Model from a ModelInfo (used when populating the catalog). /// If local_path is non-empty the model is marked as cached at that location. - /// The managers are non-owning bindings used by Download/Load/Unload. In production + /// The bindings are non-owning and used by Download/Load/Unload. In production /// Manager owns both via unique_ptr. Tests can use `fl::test::FakeServiceBindings` /// (test_helpers.h) for a one-line construction with cheap fakes. static Model FromModelInfo(ModelInfo info, std::string local_path, DownloadManager& download_manager, - ModelLoadManager& model_load_manager); + ModelCommandRouter& router); // --- Container construction --- @@ -139,9 +139,9 @@ class Model { private: // Leaf data (default/empty for containers). // cached_ is atomic — flipped concurrently by the download path. - // Loaded state is NOT stored here; it is queried from ModelLoadManager so the load - // manager remains the single source of truth (Manager::Shutdown clears its map without - // having to walk every Model and reset a local flag). + // Loaded state is NOT stored here; it is queried through the router (ModelLoadManager in + // local mode) so the load manager remains the single source of truth (Manager::Shutdown + // clears its map without having to walk every Model and reset a local flag). // local_path_ is set once during Download() (or at construction for already-cached models) // and cleared by RemoveFromCache(). It is intentionally NOT mutex-protected: concurrent // mutation alongside reads on the same Model* is not a supported pattern. @@ -152,7 +152,7 @@ class Model { // Non-owning service bindings for leaf operations. Set once at construction and never // reassigned; guaranteed non-null because FromModelInfo takes them by reference. DownloadManager* download_manager_ = nullptr; - ModelLoadManager* model_load_manager_ = nullptr; + ModelCommandRouter* router_ = nullptr; // Container data (empty/null for leaves). std::vector variants_; diff --git a/sdk_v2/cpp/src/model_command_router.cc b/sdk_v2/cpp/src/model_command_router.cc new file mode 100644 index 00000000..4b300758 --- /dev/null +++ b/sdk_v2/cpp/src/model_command_router.cc @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "model_command_router.h" + +#include "exception.h" +#include "http/http_client.h" +#include "inferencing/model_load_manager.h" +#include "logger.h" +#include "utils.h" + +#include +#include + +#include + +namespace fl { + +namespace { + +// Model load can be slow (cold cache, large weights); use a deliberate, generous client-side +// timeout instead of the v1 bindings' inconsistent/accidental values. List and unload are +// quick metadata operations and use the default request timeout. +constexpr std::chrono::minutes kLoadTimeout{5}; +constexpr std::chrono::seconds kDefaultTimeout{30}; + +} // namespace + +ModelCommandRouter::ModelCommandRouter(std::optional external_service_url, + ModelLoadManager& load_manager, + std::string app_name, + ILogger& logger) + : external_service_url_(std::move(external_service_url)), + load_manager_(load_manager), + user_agent_("foundry-local-sdk/" + app_name), + logger_(logger) {} + +std::string ModelCommandRouter::BaseUrl() const { + std::string base = *external_service_url_; + + // The configured URL may or may not carry a trailing '/'; normalise so the appended + // "/models/..." segment never produces a doubled slash. + if (!base.empty() && base.back() == '/') { + base.pop_back(); + } + + return base; +} + +std::string ModelCommandRouter::ExternalGet(const std::string& context, const std::string& url, + std::chrono::milliseconds timeout) const { + http::HttpRequestOptions options; + options.user_agent = user_agent_; + options.timeout = timeout; + + // Trace the outbound call so external-mode routing is observable; local calls are not logged. + logger_.Log(LogLevel::Debug, context + " -> GET " + url); + + auto response = http::HttpGetWithResponse(url, options); + + // status == 0 is a transport failure; any non-2xx is a server-side failure. Both are fatal. + if (response.status < 200 || response.status >= 300) { + FL_THROW(FOUNDRY_LOCAL_ERROR_NETWORK, + context + " via " + *external_service_url_ + ": " + http::DescribeFailure(response)); + } + + return response.body; +} + +void ModelCommandRouter::Load(std::string_view model_id, std::string_view local_path, ExecutionProvider ep) { + if (!external_service_url_.has_value()) { + // LoadModel is idempotent — kModelAlreadyLoaded if the id is already in the map. + auto result = load_manager_.LoadModel(local_path, model_id, ep); + + if (result.status == ModelLoadManager::LoadStatus::kModelNotFound) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "model not found at path: " + std::string(local_path)); + } + + return; + } + + // In external mode the remote service loads by id alone; local_path and ep are intentionally + // not forwarded (the service resolves the path and execution provider itself). + ExternalGet("load " + std::string(model_id), + BaseUrl() + "/models/load/" + UrlEncode(model_id), + kLoadTimeout); +} + +void ModelCommandRouter::Unload(std::string_view model_id) { + if (!external_service_url_.has_value()) { + // UnloadModel is idempotent — returns false if the id isn't loaded; the bool is ignored. + load_manager_.UnloadModel(model_id); + return; + } + + ExternalGet("unload " + std::string(model_id), + BaseUrl() + "/models/unload/" + UrlEncode(model_id), + kDefaultTimeout); +} + +bool ModelCommandRouter::IsLoaded(std::string_view model_id) { + if (!external_service_url_.has_value()) { + return load_manager_.GetLoadedModel(model_id) != nullptr; + } + + auto ids = ListLoadedModelIds(); + return std::find(ids.begin(), ids.end(), model_id) != ids.end(); +} + +std::vector ModelCommandRouter::ListLoadedModelIds() { + if (!external_service_url_.has_value()) { + return load_manager_.GetLoadedModelIds(); + } + + auto body = ExternalGet("list loaded models", BaseUrl() + "/models/loaded", kDefaultTimeout); + + // Empty / whitespace-only body means "no models loaded" — not a parse error. + auto first_non_ws = body.find_first_not_of(" \t\r\n"); + if (first_non_ws == std::string::npos) { + return {}; + } + + // Surface a malformed/non-array/non-string remote response as FOUNDRY_LOCAL_ERROR_INTERNAL + // as that means we're returning an invalid response from the SDK's web service (presumably). + nlohmann::json parsed; + + try { + parsed = nlohmann::json::parse(body); + } catch (const nlohmann::json::exception& e) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "list loaded models via " + *external_service_url_ + ": malformed response body: " + e.what()); + } + + if (!parsed.is_array()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "list loaded models via " + *external_service_url_ + ": expected a JSON array"); + } + + std::vector ids; + + for (const auto& element : parsed) { + if (!element.is_string()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "list loaded models via " + *external_service_url_ + ": array element is not a string"); + } + + ids.push_back(element.get()); + } + + return ids; +} + +} // namespace fl diff --git a/sdk_v2/cpp/src/model_command_router.h b/sdk_v2/cpp/src/model_command_router.h new file mode 100644 index 00000000..a191996f --- /dev/null +++ b/sdk_v2/cpp/src/model_command_router.h @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include "inferencing/execution_provider.h" + +#include +#include +#include +#include +#include + +namespace fl { + +class ILogger; +class ModelLoadManager; + +// ----------------------------------------------------------------------- +// ModelCommandRouter — owns the local-vs-external decision for the three +// model management commands (list-loaded / load / unload) ONLY. +// +// When `external_service_url` is unset the router delegates to the in-process +// ModelLoadManager. When set, it redirects the command to that remote Foundry +// Local service over HTTP. The OpenAI-compatible inference endpoints are out of +// scope — callers reach those directly, never through this router. +// +// Keeping this decision in one façade lets `Model` and the catalog stay +// mode-agnostic, and keeps ModelLoadManager a pure local lifecycle owner. +// ----------------------------------------------------------------------- + +class ModelCommandRouter { + public: + /// `load_manager` and `logger` are non-owning references; the owner (Manager) guarantees + /// they outlive this router via declaration ordering. + ModelCommandRouter(std::optional external_service_url, + ModelLoadManager& load_manager, + std::string app_name, + ILogger& logger); + + ModelCommandRouter(const ModelCommandRouter&) = delete; + ModelCommandRouter& operator=(const ModelCommandRouter&) = delete; + + /// Local: ModelLoadManager::LoadModel(local_path, model_id, ep), throwing on kModelNotFound. + /// External: GET {url}/models/load/{url-encoded id} with a generous timeout. + void Load(std::string_view model_id, std::string_view local_path, ExecutionProvider ep); + + /// Local: ModelLoadManager::UnloadModel(model_id) (idempotent). + /// External: GET {url}/models/unload/{url-encoded id}. + void Unload(std::string_view model_id); + + /// Local: ModelLoadManager::GetLoadedModel(model_id) != nullptr. + /// External: membership test against ListLoadedModelIds(). Batch callers should use + /// ListLoadedModelIds() directly to avoid a round-trip per id. + bool IsLoaded(std::string_view model_id); + + /// Local: ModelLoadManager loaded-map keys. + /// External: GET {url}/models/loaded, parsed as a JSON array of ids (empty body → {}). + std::vector ListLoadedModelIds(); + + private: + /// Trailing-'/'-trimmed external base URL. Precondition: external_service_url_ has a value. + std::string BaseUrl() const; + + /// Issue an external GET, throwing FOUNDRY_LOCAL_ERROR_NETWORK on non-2xx / transport failure. + /// `context` is prefixed to the error message (e.g. "load phi-3"). + std::string ExternalGet(const std::string& context, const std::string& url, + std::chrono::milliseconds timeout) const; + + std::optional external_service_url_; + ModelLoadManager& load_manager_; + std::string user_agent_; + ILogger& logger_; +}; + +} // namespace fl diff --git a/sdk_v2/cpp/src/service/models_handlers.cc b/sdk_v2/cpp/src/service/models_handlers.cc index 93988d3b..4c019e6e 100644 --- a/sdk_v2/cpp/src/service/models_handlers.cc +++ b/sdk_v2/cpp/src/service/models_handlers.cc @@ -9,6 +9,7 @@ #include "service/handler_utils.h" #include "service/web_service.h" #include "telemetry/telemetry_action_tracker.h" +#include "utils.h" #include // for FOUNDRY_LOCAL_MODEL_PROP_* constants @@ -16,6 +17,26 @@ namespace fl { +namespace { + +// Resolve a /models/{model} path segment to a model. The path segment is a model id or an alias — +// not a model "name" (a model's name is the id minus its ":version" suffix, i.e. id == name:version). +// The SDK's public Model API issues load / unload using the model_id (e.g. +// "phi-4-mini-instruct-generic-cpu:2"), while the CLI and legacy callers may pass an alias (e.g. +// "phi-4-mini-instruct"). Accept both: try the id index first, then the alias index. model_ids and +// aliases occupy disjoint namespaces, so there is no ambiguity. This keeps every /models/* operation +// id-addressable (matching GET /models/loaded, which reports model_ids) while preserving alias +// backward-compatibility. +Model* ResolveModelByIdOrAlias(ServiceContext& ctx, const std::string& model_id_or_alias) { + if (auto* variant = ctx.catalog.GetModelVariant(model_id_or_alias)) { + return variant; + } + + return ctx.catalog.GetModel(model_id_or_alias); +} + +} // namespace + // ======================================================================== // Handler: GET /models/loaded // ======================================================================== @@ -40,7 +61,7 @@ class ListLoadedModelsHandler : public HttpRequestHandler { }; // ======================================================================== -// Handler: GET /models/load/{name} +// Handler: GET /models/load/{model} // ======================================================================== class LoadModelHandler : public HttpRequestHandler { @@ -50,16 +71,19 @@ class LoadModelHandler : public HttpRequestHandler { std::shared_ptr handle(const std::shared_ptr& request) override { ActionTracker tracker(Action::kModelLoad, ctx_.telemetry); - auto name_raw = request->getPathVariable("name"); - if (!name_raw) { - return ErrorResponse(Status::CODE_400, "Missing model name"); + auto model_path_param = request->getPathVariable("model"); + if (!model_path_param) { + return ErrorResponse(Status::CODE_400, "Missing model id or alias"); } - std::string name = name_raw->c_str(); - auto* model = ctx_.catalog.GetModel(name); + // The model_id is percent-encoded on the wire by the SDK's ModelCommandRouter (e.g. ':' -> '%3A'), + // so decode before resolving. Without this every id-form value (the SDK's default) 404s. + std::string model_id_or_alias = UrlDecode(model_path_param->c_str()); + auto* model = ResolveModelByIdOrAlias(ctx_, model_id_or_alias); if (!model) { - return ErrorResponse(Status::CODE_404, "Model not found", "No model matching '" + name + "'"); + return ErrorResponse(Status::CODE_404, "Model not found", + "No model matching '" + model_id_or_alias + "'"); } if (model->IsLoaded()) { @@ -74,16 +98,18 @@ class LoadModelHandler : public HttpRequestHandler { try { model->Load(); - tracker.SetModelId(name); + tracker.SetModelId(model_id_or_alias); tracker.SetStatus(ActionStatus::kSuccess); - ctx_.logger.Log(LogLevel::Information, fmt::format("Model loaded via web service: {}", name)); + ctx_.logger.Log(LogLevel::Information, + fmt::format("Model loaded via web service: {}", model_id_or_alias)); return JsonResponse(Status::CODE_200, {{"status", "loaded"}}); } catch (const std::exception& ex) { - tracker.SetModelId(name); + tracker.SetModelId(model_id_or_alias); tracker.RecordException(ex); - ctx_.logger.Log(LogLevel::Error, fmt::format("Failed to load model {}: {}", name, ex.what())); + ctx_.logger.Log(LogLevel::Error, + fmt::format("Failed to load model {}: {}", model_id_or_alias, ex.what())); return ErrorResponse(Status::CODE_500, "Load failed", ex.what()); } } @@ -93,7 +119,7 @@ class LoadModelHandler : public HttpRequestHandler { }; // ======================================================================== -// Handler: GET /models/unload/{name} +// Handler: GET /models/unload/{model} // ======================================================================== class UnloadModelHandler : public HttpRequestHandler { @@ -103,16 +129,19 @@ class UnloadModelHandler : public HttpRequestHandler { std::shared_ptr handle(const std::shared_ptr& request) override { ActionTracker tracker(Action::kModelUnload, ctx_.telemetry); - auto name_raw = request->getPathVariable("name"); - if (!name_raw) { - return ErrorResponse(Status::CODE_400, "Missing model name"); + auto model_path_param = request->getPathVariable("model"); + if (!model_path_param) { + return ErrorResponse(Status::CODE_400, "Missing model id or alias"); } - std::string name = name_raw->c_str(); - auto* model = ctx_.catalog.GetModel(name); + // The model_id is percent-encoded on the wire by the SDK's ModelCommandRouter (e.g. ':' -> '%3A'), + // so decode before resolving. Without this every id-form value (the SDK's default) 404s. + std::string model_id_or_alias = UrlDecode(model_path_param->c_str()); + auto* model = ResolveModelByIdOrAlias(ctx_, model_id_or_alias); if (!model) { - return ErrorResponse(Status::CODE_404, "Model not found", "No model matching '" + name + "'"); + return ErrorResponse(Status::CODE_404, "Model not found", + "No model matching '" + model_id_or_alias + "'"); } if (!model->IsLoaded()) { @@ -123,16 +152,18 @@ class UnloadModelHandler : public HttpRequestHandler { try { model->Unload(); - tracker.SetModelId(name); + tracker.SetModelId(model_id_or_alias); tracker.SetStatus(ActionStatus::kSuccess); - ctx_.logger.Log(LogLevel::Information, fmt::format("Model unloaded via web service: {}", name)); + ctx_.logger.Log(LogLevel::Information, + fmt::format("Model unloaded via web service: {}", model_id_or_alias)); return JsonResponse(Status::CODE_200, {{"status", "unloaded"}}); } catch (const std::exception& ex) { - tracker.SetModelId(name); + tracker.SetModelId(model_id_or_alias); tracker.RecordException(ex); - ctx_.logger.Log(LogLevel::Error, fmt::format("Failed to unload model {}: {}", name, ex.what())); + ctx_.logger.Log(LogLevel::Error, + fmt::format("Failed to unload model {}: {}", model_id_or_alias, ex.what())); return ErrorResponse(Status::CODE_500, "Unload failed", ex.what()); } } @@ -195,7 +226,7 @@ class OpenAIListModelsHandler : public HttpRequestHandler { }; // ======================================================================== -// Handler: GET /v1/models/{name} — OpenAI-compatible model retrieve +// Handler: GET /v1/models/{model} — OpenAI-compatible model retrieve // ======================================================================== class OpenAIRetrieveModelHandler : public HttpRequestHandler { @@ -205,16 +236,17 @@ class OpenAIRetrieveModelHandler : public HttpRequestHandler { std::shared_ptr handle(const std::shared_ptr& request) override { ActionTracker tracker(Action::kOpenAIModelRetrieve, ctx_.telemetry); - auto name_raw = request->getPathVariable("name"); - if (!name_raw) { - return ErrorResponse(Status::CODE_400, "Missing model name"); + auto model_path_param = request->getPathVariable("model"); + if (!model_path_param) { + return ErrorResponse(Status::CODE_400, "Missing model id"); } - std::string name = name_raw->c_str(); - auto* model = ctx_.catalog.GetModelVariant(name); + // Percent-decode the id so encoded path segments (e.g. 'name%3A1') resolve like 'name:1'. + std::string model_id = UrlDecode(model_path_param->c_str()); + auto* model = ctx_.catalog.GetModelVariant(model_id); if (!model) { - return ErrorResponse(Status::CODE_404, "Model not found", "No model matching '" + name + "'"); + return ErrorResponse(Status::CODE_404, "Model not found", "No model matching '" + model_id + "'"); } const auto& info = model->Info(); diff --git a/sdk_v2/cpp/src/service/web_service.cc b/sdk_v2/cpp/src/service/web_service.cc index d2660f41..2323fb01 100644 --- a/sdk_v2/cpp/src/service/web_service.cc +++ b/sdk_v2/cpp/src/service/web_service.cc @@ -153,7 +153,7 @@ class StatusHandler : public HttpRequestHandler { }; // ======================================================================== -// Handler: POST /shutdown +// Handler: GET /shutdown // ======================================================================== class ShutdownHandler : public HttpRequestHandler { @@ -162,6 +162,10 @@ class ShutdownHandler : public HttpRequestHandler { : shutdown_fn_(std::move(shutdown_fn)) {} std::shared_ptr handle(const std::shared_ptr&) override { + // shutdown_fn_ is Manager::RequestShutdown: it only raises the shutdown flag and returns. The + // process hosting the web service watches IsShutdownRequested() and performs the actual teardown + // from its own thread. Tearing down here would self-deadlock — StopWebService() joins all worker + // threads, including this one. shutdown_fn_(); nlohmann::json body = {{"status", "shutting_down"}}; @@ -233,17 +237,17 @@ std::vector WebService::Start(const std::vector& endpo std::make_shared(ctx)); // Shutdown - impl_->router->route("POST", "/shutdown", + impl_->router->route("GET", "/shutdown", std::make_shared(impl_->shutdown_callback)); // Model management impl_->router->route("GET", "/models/loaded", CreateListLoadedModelsHandler(ctx)); - impl_->router->route("GET", "/models/load/{name}", CreateLoadModelHandler(ctx)); - impl_->router->route("GET", "/models/unload/{name}", CreateUnloadModelHandler(ctx)); + impl_->router->route("GET", "/models/load/{model}", CreateLoadModelHandler(ctx)); + impl_->router->route("GET", "/models/unload/{model}", CreateUnloadModelHandler(ctx)); // OpenAI-compatible endpoints impl_->router->route("GET", "/v1/models", CreateOpenAIListModelsHandler(ctx)); - impl_->router->route("GET", "/v1/models/{name}", CreateOpenAIRetrieveModelHandler(ctx)); + impl_->router->route("GET", "/v1/models/{model}", CreateOpenAIRetrieveModelHandler(ctx)); impl_->router->route("POST", "/v1/chat/completions", CreateChatCompletionsHandler(ctx)); impl_->router->route("POST", "/v1/audio/transcriptions", CreateAudioTranscriptionsHandler(ctx)); impl_->router->route("POST", "/v1/embeddings", CreateEmbeddingsHandler(ctx)); diff --git a/sdk_v2/cpp/src/utils.cc b/sdk_v2/cpp/src/utils.cc index 85ced656..5b00cdd3 100644 --- a/sdk_v2/cpp/src/utils.cc +++ b/sdk_v2/cpp/src/utils.cc @@ -38,6 +38,59 @@ std::string FormatLocationMessage(const CodeLocation& location, const std::strin } // namespace +std::string UrlEncode(std::string_view value) { + std::string result; + result.reserve(value.size() * 2); + + for (unsigned char c : value) { + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == '~') { + result += static_cast(c); + } else { + static const char hex[] = "0123456789ABCDEF"; + result += '%'; + result += hex[c >> 4]; + result += hex[c & 0x0F]; + } + } + + return result; +} + +std::string UrlDecode(std::string_view value) { + std::string result; + result.reserve(value.size()); + + auto hex_value = [](char c) -> int { + if (c >= '0' && c <= '9') { + return c - '0'; + } + if (c >= 'A' && c <= 'F') { + return c - 'A' + 10; + } + if (c >= 'a' && c <= 'f') { + return c - 'a' + 10; + } + return -1; + }; + + for (size_t i = 0; i < value.size(); ++i) { + if (value[i] == '%' && i + 2 < value.size()) { + int hi = hex_value(value[i + 1]); + int lo = hex_value(value[i + 2]); + if (hi >= 0 && lo >= 0) { + result += static_cast((hi << 4) | lo); + i += 2; + continue; + } + } + + result += value[i]; + } + + return result; +} + #ifdef _WIN32 // Safe getenv wrapper for MSVC (avoids C4996). static std::optional SafeGetEnv(const char* name) { diff --git a/sdk_v2/cpp/src/utils.h b/sdk_v2/cpp/src/utils.h index 260203c9..6c722fef 100644 --- a/sdk_v2/cpp/src/utils.h +++ b/sdk_v2/cpp/src/utils.h @@ -9,12 +9,23 @@ #include #include +#include #include namespace fl { class ILogger; +/// Percent-encode a string for safe inclusion in a URL path segment or query value. +/// Unreserved characters (RFC 3986: A-Z a-z 0-9 - _ . ~) pass through; everything else +/// is encoded as %XX. Shared so callers building remote URLs don't reinvent it. +std::string UrlEncode(std::string_view value); + +/// Percent-decode a string from a URL path segment or query value (inverse of UrlEncode). +/// %XX escapes are decoded to their byte value; '+' is left as-is (path semantics, not form). Any +/// malformed trailing '%' is passed through unchanged. Used to recover model_ids from request paths. +std::string UrlDecode(std::string_view value); + struct Utils { /// Read an environment variable value. Returns nullopt when missing. /// Empty string is a valid value when the variable exists but has no content. diff --git a/sdk_v2/cpp/test/CMakeLists.txt b/sdk_v2/cpp/test/CMakeLists.txt index efa1bf1c..6f0ed95e 100644 --- a/sdk_v2/cpp/test/CMakeLists.txt +++ b/sdk_v2/cpp/test/CMakeLists.txt @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. find_package(httplib CONFIG REQUIRED) +find_package(reproc++ CONFIG REQUIRED) # ========================================================================== # Internal tests — test the implementation directly @@ -36,6 +37,7 @@ add_executable(foundry_local_tests internal_api/model_info_test.cc internal_api/model_info_accessors_test.cc internal_api/model_io_info_test.cc + internal_api/model_command_router_test.cc internal_api/model_load_manager_test.cc internal_api/model_sorting_test.cc internal_api/platform_path_test.cc @@ -250,3 +252,63 @@ gtest_discover_tests(cache_only_tests DISCOVERY_MODE PRE_TEST DISCOVERY_TIMEOUT 60 ) + +# ========================================================================== +# External-mode tests — client/server split via a separate web-service process. +# Separate binary because Manager is a singleton: the web-service host (non-cache-only) +# and the external-mode client (cache-only) cannot coexist in one process. The binary +# re-executes itself with `--serve` to host the service, then runs the suite in the +# parent process pointed at that host. Provides its own main() (no GTest::gtest_main). +# ========================================================================== +add_executable(external_mode_tests + sdk_api/external_mode_test.cc +) + +set_target_properties(external_mode_tests PROPERTIES CXX_STANDARD 17) +target_compile_options(external_mode_tests PRIVATE ${FOUNDRY_LOCAL_COMPILE_OPTIONS}) + +target_include_directories(external_mode_tests + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} +) + +target_link_libraries(external_mode_tests + PRIVATE + foundry_local_cpp + GTest::gtest + httplib::httplib + reproc++ +) + +add_dependencies(external_mode_tests foundry_local) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(external_mode_tests PRIVATE -Wl,--disable-new-dtags) +endif() + +if(APPLE) + set_target_properties(external_mode_tests PROPERTIES + BUILD_RPATH "${ORT_GENAI_LIB_DIR};${ORT_LIB_DIR}" + ) +endif() + +# Stage the tiny loadable test model next to the binary so the host process can serve it. +add_custom_command(TARGET external_mode_tests POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CMAKE_CURRENT_SOURCE_DIR}/testdata + $/testdata +) + +if(ANDROID) + target_compile_definitions(external_mode_tests PRIVATE FOUNDRY_LOCAL_TEST_DATA_DIR="testdata") +else() + target_compile_definitions(external_mode_tests PRIVATE + FOUNDRY_LOCAL_TEST_DATA_DIR="$/testdata" + ) +endif() + +gtest_discover_tests(external_mode_tests + WORKING_DIRECTORY $ + DISCOVERY_MODE PRE_TEST + DISCOVERY_TIMEOUT 60 +) diff --git a/sdk_v2/cpp/test/internal_api/audio/audio_session_test.cc b/sdk_v2/cpp/test/internal_api/audio/audio_session_test.cc index f6f7b447..cbc00a35 100644 --- a/sdk_v2/cpp/test/internal_api/audio/audio_session_test.cc +++ b/sdk_v2/cpp/test/internal_api/audio/audio_session_test.cc @@ -106,7 +106,7 @@ class AudioSessionTest : public ::testing::Test { static inline GenAIModelInstance* model_ = nullptr; static inline fl::test::FakeServiceBindings svc_; static inline Model catalog_model_ = Model::FromModelInfo( - ModelInfo{}, "", svc_.download_manager, svc_.model_load_manager); + ModelInfo{}, "", svc_.download_manager, svc_.router); fl::test::NullTelemetry null_telemetry_; fl::test::NullSessionManager null_session_manager_; }; @@ -158,7 +158,7 @@ class AudioSessionInferenceTest : public ::testing::Test { static inline GenAIModelInstance* model_ = nullptr; static inline fl::test::FakeServiceBindings svc_; static inline Model catalog_model_ = Model::FromModelInfo( - ModelInfo{}, "", svc_.download_manager, svc_.model_load_manager); + ModelInfo{}, "", svc_.download_manager, svc_.router); fl::test::NullTelemetry null_telemetry_; fl::test::NullSessionManager null_session_manager_; }; diff --git a/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc b/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc index 8012d014..207f44fa 100644 --- a/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc +++ b/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc @@ -27,7 +27,7 @@ using namespace fl; class TestCatalog : public BaseModelCatalog { public: - explicit TestCatalog(ILogger& logger) : BaseModelCatalog("test-catalog", logger) {} + explicit TestCatalog(ILogger& logger) : BaseModelCatalog("test-catalog", fl::test::NullRouter(), logger) {} void AddModel(Model model) { models_.push_back(std::move(model)); @@ -53,7 +53,7 @@ static Model MakeModel(const std::string& model_id, const std::string& name, info.version = version; info.alias = alias; return Model::FromModelInfo(std::move(info), local_path, - svc.download_manager, svc.model_load_manager); + svc.download_manager, svc.router); } // ======================================================================== @@ -209,7 +209,7 @@ TEST_F(BaseModelCatalogTest, GetModel_SkipsInvalidEntries) { TestCatalog catalog(logger_); fl::test::FakeServiceBindings svc; catalog.AddModel(Model::FromModelInfo(invalid_info, "", - svc.download_manager, svc.model_load_manager)); + svc.download_manager, svc.router)); catalog.AddModel(MakeModel("good:1", "good", 1, "good-alias")); // Invalid model is skipped during grouping — only the valid model is listed diff --git a/sdk_v2/cpp/test/internal_api/chat/chat_session_test.cc b/sdk_v2/cpp/test/internal_api/chat/chat_session_test.cc index 891aa9d6..e529463d 100644 --- a/sdk_v2/cpp/test/internal_api/chat/chat_session_test.cc +++ b/sdk_v2/cpp/test/internal_api/chat/chat_session_test.cc @@ -67,7 +67,7 @@ class ChatSessionTest : public ::testing::Test { static inline GenAIModelInstance* model_ = nullptr; static inline fl::test::FakeServiceBindings svc_; static inline Model catalog_model_ = Model::FromModelInfo( - ModelInfo{}, "", svc_.download_manager, svc_.model_load_manager); + ModelInfo{}, "", svc_.download_manager, svc_.router); fl::test::NullTelemetry null_telemetry_; fl::test::NullSessionManager null_session_manager_; }; diff --git a/sdk_v2/cpp/test/internal_api/model_command_router_test.cc b/sdk_v2/cpp/test/internal_api/model_command_router_test.cc new file mode 100644 index 00000000..d85933b9 --- /dev/null +++ b/sdk_v2/cpp/test/internal_api/model_command_router_test.cc @@ -0,0 +1,449 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// Tests for fl::ModelCommandRouter — the façade that owns the local-vs-external +// decision for the three model-management commands (list-loaded / load / unload). +// +// Coverage map: +// * Local branch — wraps a real ModelLoadManager with external_service_url unset. +// Exercised with the cheap, model-free idempotent/empty cases. +// * External branch — pointed at a tiny in-process httplib stub server returning +// canned bodies. Covers base-URL trailing-slash trimming, the +// JSON-array parse (empty/whitespace → {}, ["a","b"] → {a,b}), +// url-encoding of the id path segment, membership-based IsLoaded, +// and FOUNDRY_LOCAL_ERROR_NETWORK on non-2xx / transport failure. +// * Real web service round-trip (guarded by FOUNDRY_LOCAL_HAS_WEB_SERVICE) — points +// an external-mode router at a real Foundry Local WebService and +// verifies load → list → IsLoaded → unload, mirroring v1's +// TestModelLoadManagerExternalService. Also asserts the +// BaseModelCatalog::GetLoadedModels() external-mode regression fix. + +#include "model_command_router.h" + +#include "catalog/base_model_catalog.h" +#include "ep_detection/ep_detector.h" +#include "exception.h" +#include "inferencing/execution_provider.h" +#include "inferencing/model_load_manager.h" +#include "internal_api/test_helpers.h" +#include "logger.h" +#include "model.h" +#include "model_info.h" + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace fl; + +namespace { + +// ======================================================================== +// Local-branch tests — no server, no model. These exercise the cheap +// idempotent/empty paths that the design promises without loading anything. +// ======================================================================== + +class RouterLocalTest : public ::testing::Test { + protected: + test::CpuOnlyEpDetector ep_; + StderrLogger logger_; + ModelLoadManager load_manager_{ep_, logger_}; + ModelCommandRouter router_{/*external_service_url=*/std::nullopt, load_manager_, "test", logger_}; +}; + +TEST_F(RouterLocalTest, ListLoadedModelIds_EmptyWhenNothingLoaded) { + EXPECT_TRUE(router_.ListLoadedModelIds().empty()); +} + +TEST_F(RouterLocalTest, IsLoaded_FalseWhenNotLoaded) { + EXPECT_FALSE(router_.IsLoaded("never-loaded:1")); +} + +TEST_F(RouterLocalTest, Unload_UnloadedId_IsIdempotentNoThrow) { + // UnloadModel returns false for an unknown id; the router must swallow that and not throw. + EXPECT_NO_THROW(router_.Unload("never-loaded:1")); +} + +// ======================================================================== +// External-branch tests — a tiny in-process httplib stub server returns +// canned bodies so the HTTP path is exercised deterministically without a +// real model or the full web service. +// ======================================================================== + +// Minimal canned-response HTTP server. Records every requested path so tests can +// assert URL shape (trailing-slash trimming, url-encoded id segment). +class StubServer { + public: + StubServer() { + server_.Get("/.*", [this](const httplib::Request& req, httplib::Response& res) { + { + std::lock_guard lock(mutex_); + // Record the raw, undecoded request-target (not req.path, which httplib percent-decodes) + // so url-encoding assertions can see the wire bytes the router actually sent. + requested_paths_.push_back(req.target); + } + + res.status = response_status_; + res.set_content(response_body_, "application/json"); + }); + + // bind_to_any_port() binds to an OS-assigned ephemeral port and returns it. (bind_to_port() + // returns a bool, not the port — using it here would silently yield port 1.) + port_ = server_.bind_to_any_port("127.0.0.1"); + + listener_ = std::thread([this]() { server_.listen_after_bind(); }); + + // The socket is already in listen() state after bind, but wait until the accept loop is + // actually spun up so the very first request can't race the thread start. + while (!server_.is_running()) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + + ~StubServer() { + server_.stop(); + if (listener_.joinable()) { + listener_.join(); + } + } + + StubServer(const StubServer&) = delete; + StubServer& operator=(const StubServer&) = delete; + + std::string BaseUrl() const { return "http://127.0.0.1:" + std::to_string(port_); } + + void SetResponse(int status, std::string body) { + response_status_ = status; + response_body_ = std::move(body); + } + + std::vector RequestedPaths() const { + std::lock_guard lock(mutex_); + return requested_paths_; + } + + private: + httplib::Server server_; + int port_ = 0; + std::thread listener_; + + std::atomic response_status_{200}; + std::string response_body_{"[]"}; + + mutable std::mutex mutex_; + std::vector requested_paths_; +}; + +class RouterExternalTest : public ::testing::Test { + protected: + StubServer server_; + test::CpuOnlyEpDetector ep_; + StderrLogger logger_; + ModelLoadManager load_manager_{ep_, logger_}; +}; + +TEST_F(RouterExternalTest, ListLoadedModelIds_ParsesJsonArray) { + server_.SetResponse(200, R"(["alpha:1","beta:2"])"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + auto ids = router.ListLoadedModelIds(); + + ASSERT_EQ(ids.size(), 2u); + EXPECT_EQ(ids[0], "alpha:1"); + EXPECT_EQ(ids[1], "beta:2"); + + auto paths = server_.RequestedPaths(); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], "/models/loaded"); +} + +TEST_F(RouterExternalTest, ListLoadedModelIds_EmptyBodyYieldsEmptyVector) { + server_.SetResponse(200, ""); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + EXPECT_TRUE(router.ListLoadedModelIds().empty()); +} + +TEST_F(RouterExternalTest, ListLoadedModelIds_WhitespaceBodyYieldsEmptyVector) { + server_.SetResponse(200, " \r\n\t "); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + EXPECT_TRUE(router.ListLoadedModelIds().empty()); +} + +TEST_F(RouterExternalTest, ListLoadedModelIds_EmptyJsonArrayYieldsEmptyVector) { + server_.SetResponse(200, "[]"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + EXPECT_TRUE(router.ListLoadedModelIds().empty()); +} + +TEST_F(RouterExternalTest, BaseUrl_TrailingSlashIsTrimmed) { + server_.SetResponse(200, "[]"); + // The configured URL carries a trailing '/'; the appended "/models/loaded" must not double up. + ModelCommandRouter router(server_.BaseUrl() + "/", load_manager_, "test", logger_); + + router.ListLoadedModelIds(); + + auto paths = server_.RequestedPaths(); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], "/models/loaded") << "Expected no doubled slash from the trailing-'/' base URL"; +} + +TEST_F(RouterExternalTest, IsLoaded_TrueWhenIdPresentInLoadedSet) { + server_.SetResponse(200, R"(["phi:1","qwen:3"])"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + EXPECT_TRUE(router.IsLoaded("qwen:3")); +} + +TEST_F(RouterExternalTest, IsLoaded_FalseWhenIdAbsentFromLoadedSet) { + server_.SetResponse(200, R"(["phi:1","qwen:3"])"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + EXPECT_FALSE(router.IsLoaded("missing:9")); +} + +TEST_F(RouterExternalTest, Load_UrlEncodesIdInPathSegment) { + server_.SetResponse(200, R"({"status":"loaded"})"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + // ':' is reserved and must be percent-encoded to %3A in the path segment. + router.Load("phi-4:1", "/ignored/in/external/mode", ExecutionProvider::kCPU); + + auto paths = server_.RequestedPaths(); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], "/models/load/phi-4%3A1"); +} + +TEST_F(RouterExternalTest, Unload_UrlEncodesIdInPathSegment) { + server_.SetResponse(200, R"({"status":"unloaded"})"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + router.Unload("phi-4:1"); + + auto paths = server_.RequestedPaths(); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], "/models/unload/phi-4%3A1"); +} + +TEST_F(RouterExternalTest, Load_NonSuccessStatusThrowsNetwork) { + server_.SetResponse(500, R"({"error":"boom"})"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + try { + router.Load("phi-4:1", "/ignored", ExecutionProvider::kCPU); + FAIL() << "Expected fl::Exception on non-2xx response"; + } catch (const fl::Exception& e) { + EXPECT_EQ(e.code(), FOUNDRY_LOCAL_ERROR_NETWORK); + } +} + +TEST_F(RouterExternalTest, ListLoadedModelIds_NonSuccessStatusThrowsNetwork) { + server_.SetResponse(404, "not found"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + try { + router.ListLoadedModelIds(); + FAIL() << "Expected fl::Exception on non-2xx response"; + } catch (const fl::Exception& e) { + EXPECT_EQ(e.code(), FOUNDRY_LOCAL_ERROR_NETWORK); + } +} + +TEST_F(RouterExternalTest, Unload_NonSuccessStatusThrowsNetwork) { + server_.SetResponse(503, "unavailable"); + ModelCommandRouter router(server_.BaseUrl(), load_manager_, "test", logger_); + + try { + router.Unload("phi-4:1"); + FAIL() << "Expected fl::Exception on non-2xx response"; + } catch (const fl::Exception& e) { + EXPECT_EQ(e.code(), FOUNDRY_LOCAL_ERROR_NETWORK); + } +} + +TEST(RouterExternalTransportTest, TransportFailureThrowsNetwork) { + // Port 1 on loopback has no listener — the connection is refused, which the router must surface + // as FOUNDRY_LOCAL_ERROR_NETWORK (status == 0 transport failure), not crash or hang. + test::CpuOnlyEpDetector ep; + StderrLogger logger; + ModelLoadManager load_manager(ep, logger); + ModelCommandRouter router("http://127.0.0.1:1", load_manager, "test", logger); + + try { + router.ListLoadedModelIds(); + FAIL() << "Expected fl::Exception on transport failure"; + } catch (const fl::Exception& e) { + EXPECT_EQ(e.code(), FOUNDRY_LOCAL_ERROR_NETWORK); + } +} + +} // namespace + +// ======================================================================== +// Real-web-service round-trip — external-mode router pointed at a live +// Foundry Local WebService. Mirrors v1's TestModelLoadManagerExternalService. +// Compiled only when the web service is built into the binary. +// ======================================================================== + +#ifdef FOUNDRY_LOCAL_HAS_WEB_SERVICE + +#include "internal_api/web_service_test_helpers.h" +#include "service/web_service.h" +#include "inferencing/session/session_manager.h" +#include "null_telemetry.h" + +#include + +namespace { + +// BaseModelCatalog subclass used to verify the GetLoadedModels() external-mode regression: it is +// wired with the *external* router, so GetLoadedModels() must reflect models loaded over HTTP. +class ExternalRouterCatalog : public BaseModelCatalog { + public: + ExternalRouterCatalog(ModelCommandRouter& router, ILogger& logger) + : BaseModelCatalog("external-router-catalog", router, logger) {} + + void AddModel(Model model) { models_.push_back(std::move(model)); } + + protected: + std::vector FetchModels() const override { return std::move(models_); } + + private: + mutable std::vector models_; +}; + +// Starts a real WebService backed by a real ModelLoadManager and a catalog containing the tiny +// loadable test model. A separate, external-mode ModelCommandRouter is pointed at the bound URL so +// the tests drive the remote /models/* endpoints through the router under test. +class RouterWebServiceRoundTripTest : public ::testing::Test { + protected: + void SetUp() override { + loadable_path_ = test::GetTestDataModelPath(test::kLoadableTestModelAlias); + if (!std::filesystem::exists(loadable_path_)) { + GTEST_SKIP() << "Loadable test model not present at " << loadable_path_ + << "; skipping external-service round-trip."; + } + + logger_ = std::make_unique(); + ep_detector_ = std::make_unique(); + server_load_manager_ = std::make_unique(*ep_detector_, *logger_); + session_manager_ = std::make_unique(*logger_); + null_telemetry_ = std::make_unique(); + + // Local-mode router that the WebService catalog uses for its own bookkeeping. + server_router_ = std::make_unique(std::nullopt, *server_load_manager_, "test", *logger_); + + server_catalog_ = std::make_unique(); + server_catalog_->AddModel(Model::FromModelInfo( + test::MakeTestModelInfo(test::kLoadableTestModelAlias, "microsoft"), + loadable_path_, svc_.download_manager, *server_router_)); + + service_ = std::make_unique(*server_catalog_, *logger_, "/tmp/router-test-cache", + *server_load_manager_, *session_manager_, *null_telemetry_, []() {}); + auto urls = service_->Start({"http://127.0.0.1:0"}); + ASSERT_EQ(urls.size(), 1u); + base_url_ = urls[0]; + + // The router under test talks to the service over HTTP in external mode. It wraps a *second*, + // unused ModelLoadManager so the local branch is never accidentally taken. + client_load_manager_ = std::make_unique(*ep_detector_, *logger_); + external_router_ = std::make_unique(base_url_, *client_load_manager_, "test", *logger_); + } + + void TearDown() override { + if (service_) { + service_->Stop(); + } + external_router_.reset(); + client_load_manager_.reset(); + service_.reset(); + server_catalog_.reset(); + server_router_.reset(); + null_telemetry_.reset(); + session_manager_.reset(); + server_load_manager_.reset(); + ep_detector_.reset(); + logger_.reset(); + } + + // The id reported by the web service for the loadable alias, e.g. "tiny-random-gpt2-fp32-1:1". + std::string LoadedId() const { return std::string(test::kLoadableTestModelAlias) + ":1"; } + + std::string loadable_path_; + std::string base_url_; + std::unique_ptr logger_; + std::unique_ptr ep_detector_; + std::unique_ptr server_load_manager_; + std::unique_ptr client_load_manager_; + std::unique_ptr session_manager_; + std::unique_ptr null_telemetry_; + std::unique_ptr server_router_; + std::unique_ptr external_router_; + std::unique_ptr server_catalog_; + std::unique_ptr service_; + test::FakeServiceBindings svc_; +}; + +TEST_F(RouterWebServiceRoundTripTest, LoadListUnloadRoundTripThroughExternalRouter) { + // Nothing loaded initially. + EXPECT_TRUE(external_router_->ListLoadedModelIds().empty()); + EXPECT_FALSE(external_router_->IsLoaded(LoadedId())); + + // Load over the external route. local_path / ep are ignored in external mode; the web service + // resolves the alias against its own catalog and loads from disk. + external_router_->Load(test::kLoadableTestModelAlias, loadable_path_, ExecutionProvider::kCPU); + + { + auto ids = external_router_->ListLoadedModelIds(); + ASSERT_EQ(ids.size(), 1u) << "Expected exactly the loadable model to be reported as loaded"; + EXPECT_EQ(ids[0], LoadedId()); + } + + EXPECT_TRUE(external_router_->IsLoaded(LoadedId())); + + // Unload over the external route and confirm it is gone. + external_router_->Unload(test::kLoadableTestModelAlias); + + EXPECT_TRUE(external_router_->ListLoadedModelIds().empty()); + EXPECT_FALSE(external_router_->IsLoaded(LoadedId())); +} + +TEST_F(RouterWebServiceRoundTripTest, GetLoadedModelsReflectsExternalRouteLoads) { + // A BaseModelCatalog wired with the external router. Before the fix this path always returned + // empty in external mode because it issued per-model local IsLoaded() checks. + ExternalRouterCatalog catalog(*external_router_, *logger_); + catalog.AddModel(Model::FromModelInfo( + test::MakeTestModelInfo(test::kLoadableTestModelAlias, "microsoft"), + loadable_path_, svc_.download_manager, *external_router_)); + + EXPECT_TRUE(catalog.GetLoadedModels().empty()) << "Nothing loaded yet"; + + external_router_->Load(test::kLoadableTestModelAlias, loadable_path_, ExecutionProvider::kCPU); + + { + auto loaded = catalog.GetLoadedModels(); + ASSERT_EQ(loaded.size(), 1u) + << "GetLoadedModels() must reflect the model loaded over the external route"; + EXPECT_EQ(loaded[0]->Id(), LoadedId()); + } + + external_router_->Unload(test::kLoadableTestModelAlias); + + EXPECT_TRUE(catalog.GetLoadedModels().empty()) << "Model should be gone after external unload"; +} + +} // namespace + +#endif // FOUNDRY_LOCAL_HAS_WEB_SERVICE diff --git a/sdk_v2/cpp/test/internal_api/model_io_info_test.cc b/sdk_v2/cpp/test/internal_api/model_io_info_test.cc index 34fcfd42..0fff7a44 100644 --- a/sdk_v2/cpp/test/internal_api/model_io_info_test.cc +++ b/sdk_v2/cpp/test/internal_api/model_io_info_test.cc @@ -25,7 +25,7 @@ static Model MakeModelWithTask(const std::string& task) { info.alias = "test-alias"; info.task = task; return Model::FromModelInfo(std::move(info), "", - svc.download_manager, svc.model_load_manager); + svc.download_manager, svc.router); } // ======================================================================== diff --git a/sdk_v2/cpp/test/internal_api/model_sorting_test.cc b/sdk_v2/cpp/test/internal_api/model_sorting_test.cc index 73dfb583..d691dc67 100644 --- a/sdk_v2/cpp/test/internal_api/model_sorting_test.cc +++ b/sdk_v2/cpp/test/internal_api/model_sorting_test.cc @@ -29,7 +29,7 @@ using namespace fl; class SortTestCatalog : public BaseModelCatalog { public: - explicit SortTestCatalog(ILogger& logger) : BaseModelCatalog("sort-test-catalog", logger) {} + explicit SortTestCatalog(ILogger& logger) : BaseModelCatalog("sort-test-catalog", fl::test::NullRouter(), logger) {} void AddModel(Model model) { models_.push_back(std::move(model)); @@ -62,7 +62,7 @@ static Model MakeModel(const std::string& base_name, static fl::test::FakeServiceBindings svc; return Model::FromModelInfo(std::move(info), "", - svc.download_manager, svc.model_load_manager); + svc.download_manager, svc.router); } // ======================================================================== diff --git a/sdk_v2/cpp/test/internal_api/session_manager_test.cc b/sdk_v2/cpp/test/internal_api/session_manager_test.cc index 9e485781..e0e7ca56 100644 --- a/sdk_v2/cpp/test/internal_api/session_manager_test.cc +++ b/sdk_v2/cpp/test/internal_api/session_manager_test.cc @@ -83,7 +83,7 @@ class SessionManagerTest : public ::testing::Test { static inline GenAIModelInstance* model_ = nullptr; static inline fl::test::FakeServiceBindings svc_; static inline Model catalog_model_ = Model::FromModelInfo( - ModelInfo{}, "", svc_.download_manager, svc_.model_load_manager); + ModelInfo{}, "", svc_.download_manager, svc_.router); fl::test::NullTelemetry null_telemetry_; }; diff --git a/sdk_v2/cpp/test/internal_api/test_helpers.h b/sdk_v2/cpp/test/internal_api/test_helpers.h index 0e74751a..1139db7e 100644 --- a/sdk_v2/cpp/test/internal_api/test_helpers.h +++ b/sdk_v2/cpp/test/internal_api/test_helpers.h @@ -7,8 +7,10 @@ #include "ep_detection/ep_detector.h" #include "inferencing/model_load_manager.h" #include "logger.h" +#include "model_command_router.h" #include +#include #include #include @@ -40,12 +42,24 @@ inline ILogger& NullLog() { /// One-stop bag of cheap fakes for tests that need to construct a leaf `Model` via /// `FromModelInfo` but don't exercise Download/Load. Public fields by design — no invariants /// to protect, and the field names match the matching `FromModelInfo` parameter names so the -/// call site reads as `FromModelInfo(info, "", svc.download_manager, svc.model_load_manager)`. +/// call site reads as `FromModelInfo(info, "", svc.download_manager, svc.router)`. +/// +/// `router` wraps `model_load_manager` with no external URL, so its local branch delegates to +/// the bundled fake load manager — exactly what local-mode tests want. struct FakeServiceBindings { CpuOnlyEpDetector ep_detector; NullLogger logger; DownloadManager download_manager{/*cache_directory=*/"", /*catalog_region=*/"", /*max_concurrency=*/1, logger}; ModelLoadManager model_load_manager{ep_detector, logger}; + ModelCommandRouter router{/*external_service_url=*/std::nullopt, model_load_manager, /*app_name=*/"test", logger}; }; +/// Returns a process-wide `ModelCommandRouter` (local mode, no external URL) for tests that need +/// to satisfy a `ModelCommandRouter&` parameter — e.g. a `BaseModelCatalog` subclass ctor — but +/// don't exercise load/unload routing. +inline ModelCommandRouter& NullRouter() { + static FakeServiceBindings instance; + return instance.router; +} + } // namespace fl::test diff --git a/sdk_v2/cpp/test/internal_api/utils_test.cc b/sdk_v2/cpp/test/internal_api/utils_test.cc index b1a35469..06b3d4ad 100644 --- a/sdk_v2/cpp/test/internal_api/utils_test.cc +++ b/sdk_v2/cpp/test/internal_api/utils_test.cc @@ -179,3 +179,42 @@ TEST(UtilsTest, LogAndThrowWithLocationLogsAndThrowsFormattedMessage) { } EXPECT_TRUE(caught) << "expected fl::Exception"; } + +// ======================================================================== +// UrlEncode / UrlDecode tests +// ======================================================================== + +TEST(UrlEncodeTest, UnreservedCharactersPassThrough) { + EXPECT_EQ(UrlEncode("abcXYZ0189-_.~"), "abcXYZ0189-_.~"); +} + +TEST(UrlEncodeTest, EncodesModelIdColon) { + // ModelCommandRouter percent-encodes model_ids before placing them in a URL path. + EXPECT_EQ(UrlEncode("tiny-random-gpt2-fp32:1"), "tiny-random-gpt2-fp32%3A1"); +} + +TEST(UrlDecodeTest, DecodesPercentEscapes) { + EXPECT_EQ(UrlDecode("tiny-random-gpt2-fp32%3A1"), "tiny-random-gpt2-fp32:1"); +} + +TEST(UrlDecodeTest, IsCaseInsensitiveForHexDigits) { + EXPECT_EQ(UrlDecode("a%3ab"), "a:b"); + EXPECT_EQ(UrlDecode("a%3Ab"), "a:b"); +} + +TEST(UrlDecodeTest, PlainStringIsUnchanged) { + EXPECT_EQ(UrlDecode("phi-4-mini-instruct"), "phi-4-mini-instruct"); +} + +TEST(UrlDecodeTest, MalformedTrailingPercentIsPassedThrough) { + EXPECT_EQ(UrlDecode("abc%"), "abc%"); + EXPECT_EQ(UrlDecode("abc%3"), "abc%3"); + EXPECT_EQ(UrlDecode("bad%zztail"), "bad%zztail"); +} + +TEST(UrlEncodeDecodeTest, RoundTripsModelId) { + // The server must recover exactly what the SDK router encoded — this is the contract that the + // external-mode /models/{load,unload}/{model} handlers depend on. + const std::string model_id = "qwen2.5-0.5b-instruct-generic-cpu:4"; + EXPECT_EQ(UrlDecode(UrlEncode(model_id)), model_id); +} diff --git a/sdk_v2/cpp/test/internal_api/web_service_test.cc b/sdk_v2/cpp/test/internal_api/web_service_test.cc index 4b33be0a..6747f99d 100644 --- a/sdk_v2/cpp/test/internal_api/web_service_test.cc +++ b/sdk_v2/cpp/test/internal_api/web_service_test.cc @@ -14,6 +14,7 @@ #include "internal_api/web_service_test_helpers.h" #include "logger.h" #include "model.h" +#include "model_command_router.h" #include "model_info.h" #include "null_telemetry.h" #include "service/web_service.h" @@ -70,6 +71,10 @@ class WebServiceTest : public ::testing::Test { logger_ = std::make_unique(); ep_detector_ = std::make_unique(); model_load_manager_ = std::make_unique(*ep_detector_, *logger_); + // Router wraps the same load manager the web service uses, so model->IsLoaded() reflects + // loads driven through the HTTP routes. + model_command_router_ = std::make_unique( + std::nullopt, *model_load_manager_, "test", *logger_); session_manager_ = std::make_unique(*logger_); null_telemetry_ = std::make_unique(); catalog_ = std::make_unique(); @@ -77,10 +82,10 @@ class WebServiceTest : public ::testing::Test { // Populate with test models catalog_->AddModel(Model::FromModelInfo( test::MakeTestModelInfo("alpha-model", "acme-corp"), "", - svc_.download_manager, svc_.model_load_manager)); + svc_.download_manager, *model_command_router_)); catalog_->AddModel(Model::FromModelInfo( test::MakeTestModelInfo("beta-model", "contoso"), "", - svc_.download_manager, svc_.model_load_manager)); + svc_.download_manager, *model_command_router_)); const auto loadable_model_path = test::GetTestDataModelPath(test::kLoadableTestModelAlias); ASSERT_TRUE(std::filesystem::exists(loadable_model_path)) @@ -89,7 +94,7 @@ class WebServiceTest : public ::testing::Test { test::MakeTestModelInfo(test::kLoadableTestModelAlias, "microsoft"), loadable_model_path, svc_.download_manager, - *model_load_manager_)); + *model_command_router_)); service_ = std::make_unique(*catalog_, *logger_, "/tmp/test-cache", *model_load_manager_, *session_manager_, *null_telemetry_, []() {}); @@ -106,6 +111,8 @@ class WebServiceTest : public ::testing::Test { catalog_.reset(); session_manager_.reset(); null_telemetry_.reset(); + // Router references model_load_manager_, so destroy it first. + model_command_router_.reset(); model_load_manager_.reset(); ep_detector_.reset(); logger_.reset(); @@ -121,6 +128,7 @@ class WebServiceTest : public ::testing::Test { static std::unique_ptr ep_detector_; static std::unique_ptr logger_; static std::unique_ptr model_load_manager_; + static std::unique_ptr model_command_router_; static std::unique_ptr session_manager_; static std::unique_ptr null_telemetry_; static std::unique_ptr service_; @@ -133,6 +141,7 @@ std::unique_ptr WebServiceTest::catalog_; std::unique_ptr WebServiceTest::ep_detector_; std::unique_ptr WebServiceTest::logger_; std::unique_ptr WebServiceTest::model_load_manager_; +std::unique_ptr WebServiceTest::model_command_router_; std::unique_ptr WebServiceTest::session_manager_; std::unique_ptr WebServiceTest::null_telemetry_; std::unique_ptr WebServiceTest::service_; @@ -198,7 +207,7 @@ TEST_F(WebServiceTest, LoadedModelsReturnsModelIds) { } // ======================================================================== -// GET /models/load/{name} +// GET /models/load/{model} // ======================================================================== TEST_F(WebServiceTest, LoadModelReturnsNotFoundForUnknownModel) { @@ -216,7 +225,7 @@ TEST_F(WebServiceTest, LoadModelReturnsBadRequestWhenNotCached) { } // ======================================================================== -// GET /models/unload/{name} +// GET /models/unload/{model} // ======================================================================== TEST_F(WebServiceTest, UnloadModelReturnsNotFoundForUnknownModel) { @@ -289,7 +298,7 @@ TEST_F(WebServiceTest, OpenAIListModelsPopulatesPublisher) { } // ======================================================================== -// GET /v1/models/{name} — OpenAI-compatible retrieve +// GET /v1/models/{model} — OpenAI-compatible retrieve // ======================================================================== TEST_F(WebServiceTest, OpenAIRetrieveModelReturnsModelInfo) { diff --git a/sdk_v2/cpp/test/sdk_api/cache_only_test.cc b/sdk_v2/cpp/test/sdk_api/cache_only_test.cc index f15df2a9..4bffcf92 100644 --- a/sdk_v2/cpp/test/sdk_api/cache_only_test.cc +++ b/sdk_v2/cpp/test/sdk_api/cache_only_test.cc @@ -226,6 +226,28 @@ TEST_F(CacheOnlyTest, StartServiceThrowsInCacheOnlyMode) { } } +// Creating a local inference session is invalid in external-service mode: inference runs against +// the remote HTTP endpoints, not an in-process session. Session_CreateImpl must fail fast with +// INVALID_USAGE rather than failing indirectly when no local instance exists. The guard fires +// before any network call, so the dead external URL in MakeCacheOnlyConfig() is fine here. +TEST_F(CacheOnlyTest, CreateSessionThrowsInvalidUsageInExternalMode) { + WriteTwoModelCacheFile(); + + { + foundry_local::Manager manager(MakeCacheOnlyConfig()); + auto& catalog = manager.GetCatalog(); + auto model = catalog.GetModel("phi-4-mini-instruct"); + ASSERT_NE(model, nullptr); + + try { + foundry_local::ChatSession session(*model); + FAIL() << "Expected ChatSession construction to throw in external-service mode"; + } catch (const foundry_local::Error& e) { + EXPECT_EQ(e.Code(), FOUNDRY_LOCAL_ERROR_INVALID_USAGE); + } + } +} + TEST_F(CacheOnlyTest, EmptyCacheFileReturnsEmptyModelList) { WriteEmptyCacheFile(); diff --git a/sdk_v2/cpp/test/sdk_api/external_mode_test.cc b/sdk_v2/cpp/test/sdk_api/external_mode_test.cc new file mode 100644 index 00000000..5c9c93a1 --- /dev/null +++ b/sdk_v2/cpp/test/sdk_api/external_mode_test.cc @@ -0,0 +1,433 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// External-mode (client/server split) integration tests. +// +// Foundry Local supports an "external service URL" mode where a Manager acts as a thin client that +// forwards model lifecycle operations to a separate process running the embedded web service. The +// service exposes three model-lifecycle routes — GET /models/load/{model}, GET /models/unload/{model}, +// and GET /models/loaded; The SDK's public Model API issues these operations using the model_id +// (e.g. "tiny-random-gpt2-fp32:1") — see ModelCommandRouter, which forwards info_.model_id to +// GET /models/{load,unload}/{model}. The web service resolves {model} by model_id or alias for +// backwards compatibility. +// +// Why a separate process: foundry_local::Manager is a singleton — only one instance can exist at a +// time — so the web-service host (non-cache-only) and the external-mode client (cache-only) cannot +// coexist in the same process. The test binary re-executes itself with `--serve` to host the +// service, then runs the GTest suite in the parent process pointed at that server. +// +// Both processes share one cache directory containing a tiny, no-network-required model. The server +// (online) surfaces it in its catalog (via the cached-model merge path) and writes the catalog cache +// file the cache-only client reads, so both agree on the same model_id. When the host is offline or +// otherwise can't surface the model, the tests skip gracefully (consistent with the CI test policy). + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +// httplib transitively includes , which defines StartService as a macro +// (StartServiceA/StartServiceW). Undefine it so it can't shadow any foundry_local API identifier. +#undef StartService +#endif + +namespace fs = std::filesystem; + +namespace { + +// The tiny model's scanned model_id. LocalModelScanner derives this from the "Name" field of the +// model's inference_model.json; that value already carries a ':' version suffix, so the scanner uses +// it verbatim (it only appends ":0" when no ':' is present). Note this differs from the on-disk +// directory / alias name "tiny-random-gpt2-fp32-1". +constexpr const char* kTinyModelDirName = "tiny-random-gpt2-fp32-1"; +constexpr const char* kTinyModelId = "tiny-random-gpt2-fp32:1"; + +// Marker the host process prints once the web service is listening, followed by the URL it bound to. +// The host binds an ephemeral port (port 0), so the parent learns the actual port by parsing this +// line from the host's stdout. +constexpr const char* kServerReadyMarker = "EXTERNAL_MODE_SERVER_READY"; + +// Shared handle describing the spawned web-service host, populated by the global test environment. +struct ExternalServer { + bool available = false; ///< True once the host is reachable. + std::string url; ///< Base URL the host bound to, e.g. "http://127.0.0.1:54321". + std::string cache_dir; ///< Cache dir shared by host + client (contains the tiny model). + std::string exe_path; ///< This test binary's path, used to spawn additional hosts in tests. + + static ExternalServer& Get() { + static ExternalServer instance; + return instance; + } +}; + +// -------------------------------------------------------------------------------------------------- +// Host (`--serve`) mode +// -------------------------------------------------------------------------------------------------- + +int RunServer(int port, const std::string& cache_dir) { + try { + foundry_local::Configuration config("external_mode_test_server"); + config.SetModelCacheDir(cache_dir) + .AddWebServiceEndpoint("http://127.0.0.1:" + std::to_string(port)); + + foundry_local::Manager manager(std::move(config)); + manager.StartWebService(); + + // With port 0 the OS assigns an ephemeral port; report the URL the service actually bound to so + // the parent can connect to it. + auto endpoints = manager.GetWebServiceEndpoints(); + if (endpoints.empty()) { + std::cerr << "external-mode host: web service reported no endpoints" << std::endl; + return 1; + } + + // std::endl flushes, so the parent sees this line immediately. + std::cout << kServerReadyMarker << " " << endpoints.front() << std::endl; + + // Block until shutdown is requested (e.g. via GET /shutdown) or the parent terminates us. A + // bounded lifetime guards against orphaned hosts if the parent crashes before tear-down. + constexpr int kMaxLifetimeSeconds = 900; + for (int elapsed = 0; elapsed < kMaxLifetimeSeconds && !manager.IsShutdownRequested(); ++elapsed) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + + // GET /shutdown only raises the shutdown flag (Manager::RequestShutdown); the loop above observes + // IsShutdownRequested() and breaks. Returning destroys the local `manager`, whose destructor runs + // the actual teardown (stop web service, drain, unload) on this thread — the one safe place to do + // it. There's nothing to stop explicitly here. + return 0; + } catch (const std::exception& ex) { + // Web service support is a compile-time option. Surface the reason and exit; the parent sees the + // stdout pipe hit EOF without a ready marker and the tests skip. + std::cerr << "external-mode host failed to start: " << ex.what() << std::endl; + return 1; + } +} + +// -------------------------------------------------------------------------------------------------- +// Process spawning / teardown (parent side) +// -------------------------------------------------------------------------------------------------- + +// Spawns the host with `--port 0`, piping its stdout so the parent can read back the single ready +// line (marker + ephemeral URL). The host's stderr is inherited by this process so its logs stay +// visible. reproc handles the platform-specific process and pipe plumbing; on success the running +// child is owned by `out`. +bool SpawnServer(reproc::process& out, const std::string& exe_path, const std::string& cache_dir) { + const std::vector args = {exe_path, "--serve", "--port", "0", "--cache", cache_dir}; + + reproc::options options; + // The host prints exactly one line to stdout (the ready marker + URL); read it back through a pipe. + // Everything else — SDK logs, host diagnostics — goes to stderr, which we inherit so it surfaces in + // the test console without anyone having to drain a second pipe. + options.redirect.out.type = reproc::redirect::pipe; + options.redirect.err.type = reproc::redirect::parent; + + // Safety net: if the process somehow outlives `out`, terminate (then kill) it on destruction so a + // crashed or aborted test can't leak a host process. + options.stop = {{reproc::stop::terminate, reproc::milliseconds(2000)}, + {reproc::stop::kill, reproc::milliseconds(2000)}, + {reproc::stop::noop, reproc::milliseconds(0)}}; + + std::error_code ec = out.start(args, options); + if (ec) { + std::cerr << "failed to spawn host process: " << ec.message() << std::endl; + return false; + } + + return true; +} + +// Reads the host's single line of stdout — the ready marker followed by the URL it bound to — and +// returns that URL. Returns empty if the host exits before printing the line or the timeout elapses. +std::string ReadServerUrl(reproc::process& server, std::chrono::seconds timeout) { + std::string buffer; + const auto deadline = std::chrono::steady_clock::now() + timeout; + + while (buffer.find('\n') == std::string::npos) { + const auto now = std::chrono::steady_clock::now(); + if (now >= deadline) { + return {}; + } + + const auto remaining = std::chrono::duration_cast(deadline - now); + + // Block until the host produces output or exits, bounded by the remaining timeout. + auto [events, poll_ec] = server.poll(reproc::event::out | reproc::event::exit, remaining); + if (poll_ec) { + return {}; + } + + if (events & reproc::event::out) { + uint8_t chunk[256]; + auto [got, read_ec] = server.read(reproc::stream::out, chunk, sizeof(chunk)); + if (read_ec) { + return {}; // stdout closed — the host exited before printing the marker. + } + buffer.append(reinterpret_cast(chunk), got); + } else if (events & reproc::event::exit) { + return {}; // Host exited before printing the marker. + } + } + + // The line is " ". operator>> skips whitespace and stops at it, so a trailing '\r' + // from Windows CRLF translation is dropped automatically. + std::istringstream iss(buffer); + std::string marker; + std::string url; + iss >> marker >> url; + return marker == kServerReadyMarker ? url : std::string{}; +} + +// Asks the host to shut down gracefully via GET /shutdown and reports whether it accepted the request +// (HTTP 200 with a "shutting_down" status). The host then drains and exits on its own. +bool RequestGracefulShutdown(const std::string& url) { + httplib::Client client(url); + client.set_connection_timeout(2, 0); + client.set_read_timeout(5, 0); + + auto res = client.Get("/shutdown"); + if (!res || res->status != 200) { + return false; + } + + return res->body.find("shutting_down") != std::string::npos; +} + +// Waits up to timeout for the host process to exit on its own. Returns true if it exited within the +// window. reproc reaps the process, so a later TerminateServer is a no-op. +bool WaitForServerExit(reproc::process& server, std::chrono::milliseconds timeout) { + auto [status, ec] = server.wait(std::chrono::duration_cast(timeout)); + (void)status; + return !ec; +} + +// Forcefully stops the host if it is still running (graceful terminate, then kill) and reaps it. +// No-op if the process already exited. +void TerminateServer(reproc::process& server) { + server.stop({{reproc::stop::terminate, reproc::milliseconds(2000)}, + {reproc::stop::kill, reproc::milliseconds(2000)}, + {reproc::stop::noop, reproc::milliseconds(0)}}); +} + +// Wait until the host answers GET /models/loaded with HTTP 200 (service up AND catalog ready), or +// the timeout elapses. +bool WaitForServerReady(const std::string& url, std::chrono::seconds timeout) { + httplib::Client client(url); + client.set_connection_timeout(2, 0); + client.set_read_timeout(5, 0); + + const auto deadline = std::chrono::steady_clock::now() + timeout; + while (std::chrono::steady_clock::now() < deadline) { + auto res = client.Get("/models/loaded"); + if (res && res->status == 200) { + return true; + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + return false; +} + +// Locate the tiny model source directory copied next to the binary at build time. +fs::path TinyModelSourceDir() { +#ifdef FOUNDRY_LOCAL_TEST_DATA_DIR + return fs::path(FOUNDRY_LOCAL_TEST_DATA_DIR) / kTinyModelDirName; +#else + return fs::path("testdata") / kTinyModelDirName; +#endif +} + +// -------------------------------------------------------------------------------------------------- +// Global environment: prepares the shared cache dir, spawns the host, probes readiness. +// -------------------------------------------------------------------------------------------------- + +class ExternalServerEnvironment : public ::testing::Environment { + public: + explicit ExternalServerEnvironment(std::string exe_path) : exe_path_(std::move(exe_path)) {} + + void SetUp() override { + auto src = TinyModelSourceDir(); + if (!fs::exists(src)) { + std::cerr << "external-mode tests: tiny model not found at " << src + << " — tests will skip" << std::endl; + return; + } + + // Unique cache dir per run so concurrent or repeated runs don't collide. + std::random_device rng; + cache_dir_ = fs::temp_directory_path() / ("fl_external_mode_test_" + std::to_string(rng())); + + std::error_code ec; + fs::remove_all(cache_dir_, ec); + fs::create_directories(cache_dir_ / kTinyModelDirName, ec); + fs::copy(src, cache_dir_ / kTinyModelDirName, + fs::copy_options::recursive | fs::copy_options::overwrite_existing, ec); + if (ec) { + std::cerr << "external-mode tests: failed to stage tiny model: " << ec.message() << std::endl; + return; + } + + if (!SpawnServer(server_proc_, exe_path_, cache_dir_.string())) { + return; + } + + // The host binds an ephemeral port and prints the resulting URL; read it back before probing. + std::string url = ReadServerUrl(server_proc_, std::chrono::seconds(120)); + if (url.empty()) { + std::cerr << "external-mode tests: host did not report a ready URL — tests will skip" << std::endl; + TerminateServer(server_proc_); + return; + } + + if (!WaitForServerReady(url, std::chrono::seconds(120))) { + std::cerr << "external-mode tests: host did not become ready — tests will skip" << std::endl; + TerminateServer(server_proc_); + return; + } + + auto& server = ExternalServer::Get(); + server.available = true; + server.url = url; + server.cache_dir = cache_dir_.string(); + server.exe_path = exe_path_; + } + + void TearDown() override { + auto& server = ExternalServer::Get(); + if (server.available) { + // Prefer a graceful shutdown via the web endpoint; TerminateServer below is the forceful + // fallback and also releases the process / pipe handles. + if (RequestGracefulShutdown(server.url)) { + WaitForServerExit(server_proc_, std::chrono::seconds(30)); + } + } + + TerminateServer(server_proc_); + + if (!cache_dir_.empty()) { + std::error_code ec; + fs::remove_all(cache_dir_, ec); + } + } + + private: + std::string exe_path_; + fs::path cache_dir_; + reproc::process server_proc_; +}; + +// -------------------------------------------------------------------------------------------------- +// Tests +// -------------------------------------------------------------------------------------------------- + +// End-to-end regression guard for the external-mode model_id routing fix (PR #839): the public +// Model::Load/IsLoaded/Unload path forwards model_id to the remote web service, which must resolve +// it by id. Before the fix Load() 404'd for every real model_id in external mode. +TEST(ExternalModeModelLifecycle, LoadIsLoadedUnloadRoundTrip) { + auto& server = ExternalServer::Get(); + if (!server.available) { + GTEST_SKIP() << "external-mode web service host not available (offline or build without service)"; + } + + foundry_local::Configuration config("external_mode_test_client"); + config.SetModelCacheDir(server.cache_dir) + .SetExternalServiceUrl(server.url); + + foundry_local::Manager manager(std::move(config)); + auto& catalog = manager.GetCatalog(); + + auto model = catalog.GetModelVariant(kTinyModelId); + if (!model) { + GTEST_SKIP() << "tiny model not present in external catalog (host offline?) — id " << kTinyModelId; + } + + // model_id must resolve and load end-to-end through the remote service. + ASSERT_FALSE(model->IsLoaded()); + ASSERT_NO_THROW(model->Load()); + EXPECT_TRUE(model->IsLoaded()); + + // The loaded list is reported as model_ids (Model::Id()); the model we just loaded must appear. + bool found = false; + for (const auto& loaded : catalog.GetLoadedModels()) { + if (loaded->GetInfo().Id() == kTinyModelId) { + found = true; + break; + } + } + EXPECT_TRUE(found) << "expected loaded list to contain '" << kTinyModelId << "'"; + + // Unload by model_id must also resolve and succeed. + ASSERT_NO_THROW(model->Unload()); + EXPECT_FALSE(model->IsLoaded()); +} + +// Real coverage for the graceful-shutdown contract: GET /shutdown must be accepted and must cause the +// host process to exit on its own, with no forceful kill. Uses a dedicated short-lived host so it is +// independent of the shared host other tests rely on (and robust to test ordering / shuffle). +TEST(ExternalModeWebService, ShutdownEndpointTerminatesHostGracefully) { + auto& shared = ExternalServer::Get(); + if (!shared.available) { + GTEST_SKIP() << "external-mode web service host not available (offline or build without service)"; + } + + reproc::process host; + ASSERT_TRUE(SpawnServer(host, shared.exe_path, shared.cache_dir)) << "failed to spawn a dedicated host"; + + std::string url = ReadServerUrl(host, std::chrono::seconds(120)); + if (url.empty()) { + TerminateServer(host); + GTEST_SKIP() << "dedicated host did not report a ready URL (offline?)"; + } + ASSERT_TRUE(WaitForServerReady(url, std::chrono::seconds(120))) << "dedicated host did not become ready"; + + // The endpoint must acknowledge the request... + EXPECT_TRUE(RequestGracefulShutdown(url)) << "host rejected GET /shutdown"; + + // ...and the host must then terminate on its own, with no forceful kill required. + EXPECT_TRUE(WaitForServerExit(host, std::chrono::seconds(30))) << "host did not exit after GET /shutdown"; + + TerminateServer(host); // Releases handles; forcefully kills only if the host is somehow still alive. +} + +} // namespace + +// -------------------------------------------------------------------------------------------------- +// main — dispatches to host mode (`--serve`) or test mode. +// -------------------------------------------------------------------------------------------------- + +int main(int argc, char** argv) { + int port = 0; + std::string cache_dir; + bool serve = false; + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg == "--serve") { + serve = true; + } else if (arg == "--port" && i + 1 < argc) { + port = std::atoi(argv[++i]); + } else if (arg == "--cache" && i + 1 < argc) { + cache_dir = argv[++i]; + } + } + + if (serve) { + return RunServer(port, cache_dir); + } + + ::testing::InitGoogleTest(&argc, argv); + ::testing::AddGlobalTestEnvironment(new ExternalServerEnvironment(argv[0])); + return RUN_ALL_TESTS(); +} diff --git a/sdk_v2/cpp/test/sdk_api/model_endpoints_test.cc b/sdk_v2/cpp/test/sdk_api/model_endpoints_test.cc index 07cf781d..c84a4117 100644 --- a/sdk_v2/cpp/test/sdk_api/model_endpoints_test.cc +++ b/sdk_v2/cpp/test/sdk_api/model_endpoints_test.cc @@ -100,3 +100,68 @@ TEST_F(WebServiceIntegrationTest, UnloadModelNotFound) { ASSERT_TRUE(result) << "HTTP request failed"; EXPECT_EQ(result->status, 404); } + +TEST_F(WebServiceIntegrationTest, LoadModelByModelIdReportsAlreadyLoaded) { + auto client = MakeClient(); + auto result = client.Get(("/models/load/" + model_id()).c_str()); + ASSERT_TRUE(result) << "HTTP request failed"; + ASSERT_NE(result->status, 404) << "model_id must resolve, not 404: " << result->body; + ASSERT_EQ(result->status, 200) << result->body; + + json response = json::parse(result->body); + ASSERT_TRUE(response.contains("status")); + EXPECT_EQ(response["status"], "already_loaded") + << "The SharedTestEnv chat model is resident, so load-by-id must be a no-op"; +} + +TEST_F(WebServiceIntegrationTest, ListLoadedModelsContainsModelId) { + auto client = MakeClient(); + auto result = client.Get("/models/loaded"); + ASSERT_TRUE(result) << "HTTP request failed"; + ASSERT_EQ(result->status, 200) << result->body; + + json response = json::parse(result->body); + ASSERT_TRUE(response.is_array()); + + // The loaded list is reported as model_ids (model->Id()); the resident chat model must appear. + bool found = false; + for (const auto& entry : response) { + if (entry.is_string() && entry.get() == model_id()) { + found = true; + break; + } + } + EXPECT_TRUE(found) << "Expected loaded list to contain '" << model_id() << "': " << result->body; +} + +TEST_F(WebServiceIntegrationTest, UnloadByModelIdResolvesForNonResidentModel) { + // Pick a catalog model_id that is NOT the resident chat model, so the unload is a resolvable + // no-op ("not_loaded") rather than a destructive unload of shared state. The point is that the + // id resolves at all — before the fix an id-form name 404'd on /models/unload/{model}. + auto client = MakeClient(); + + auto list = client.Get("/v1/models"); + ASSERT_TRUE(list) << "HTTP request failed"; + ASSERT_EQ(list->status, 200) << list->body; + + json models = json::parse(list->body); + ASSERT_TRUE(models.contains("data")); + + std::string other_id; + for (const auto& m : models["data"]) { + if (m.contains("id") && m["id"].get() != model_id()) { + other_id = m["id"].get(); + break; + } + } + ASSERT_FALSE(other_id.empty()) << "Expected at least one non-resident model in the catalog"; + + auto result = client.Get(("/models/unload/" + other_id).c_str()); + ASSERT_TRUE(result) << "HTTP request failed"; + ASSERT_NE(result->status, 404) << "model_id must resolve on unload, not 404: " << result->body; + ASSERT_EQ(result->status, 200) << result->body; + + json response = json::parse(result->body); + ASSERT_TRUE(response.contains("status")); + EXPECT_EQ(response["status"], "not_loaded"); +} diff --git a/sdk_v2/cpp/vcpkg.json b/sdk_v2/cpp/vcpkg.json index 781f3f2f..b59085db 100644 --- a/sdk_v2/cpp/vcpkg.json +++ b/sdk_v2/cpp/vcpkg.json @@ -24,6 +24,7 @@ "description": "Build unit tests", "dependencies": [ "gtest", + "reproc", { "name": "cpp-httplib", "features": ["openssl"] diff --git a/sdk_v2/js/test/_fixtures/cacheOnlyManager.ts b/sdk_v2/js/test/_fixtures/cacheOnlyManager.ts index e9986db7..5d0a3947 100644 --- a/sdk_v2/js/test/_fixtures/cacheOnlyManager.ts +++ b/sdk_v2/js/test/_fixtures/cacheOnlyManager.ts @@ -87,6 +87,13 @@ export interface CacheOnlyManagerFixture { export interface SetupOptions { /** Override the appName. Defaults to a fixed test id. */ readonly appName?: string; + /** + * Override the external service endpoint. Defaults to a dead loopback port so + * the catalog resolves from the local cache without contacting a real backend. + * Pass a live stub URL (see `loadedModelsStub.ts`) to exercise the external + * load-state routing (`getLoadedModels` / `isLoaded`). + */ + readonly serviceEndpoint?: string; } /** @@ -100,7 +107,7 @@ export function setupCacheOnlyManager(opts: SetupOptions = {}): CacheOnlyManager const config: FoundryLocalConfig = { appName: opts.appName ?? "foundry-local-js-sdk-v2-tests", modelCacheDir: tmpDir, - serviceEndpoint: "http://127.0.0.1:12345", + serviceEndpoint: opts.serviceEndpoint ?? "http://127.0.0.1:12345", }; const manager = FoundryLocalManager.create(config); return { manager, tmpDir }; diff --git a/sdk_v2/js/test/_fixtures/loadedModelsStub.ts b/sdk_v2/js/test/_fixtures/loadedModelsStub.ts new file mode 100644 index 00000000..47764d06 --- /dev/null +++ b/sdk_v2/js/test/_fixtures/loadedModelsStub.ts @@ -0,0 +1,66 @@ +// A minimal stand-in for the Foundry service's model-command surface — just +// enough for the v2 SDK's `ModelCommandRouter` (external mode) to talk to a +// real HTTP endpoint instead of a dead port. Lets cache-only tests exercise the +// *happy* path of the remote load-state queries (getLoadedModels / isLoaded) +// rather than only the unreachable-service failure. +// +// Only the endpoints the router actually calls are implemented (see +// `sdk_v2/cpp/src/model_command_router.cc`): +// GET /models/loaded -> JSON array of loaded model-id strings +// GET /models/load/{id} -> 200 (load acknowledged) +// GET /models/unload/{id} -> 200 (unload acknowledged) +// Any other path returns 404 so unexpected routing surfaces as a test failure +// instead of silently passing. +// +// The server runs in a worker thread (its own event loop). This is essential: +// the SDK's native `getLoadedModels`/`isLoaded` are *synchronous* and block the +// caller's thread for the duration of the HTTP request. An in-process server on +// the same event loop would deadlock — it could never accept the connection +// while the main thread is parked in native code. +import { fileURLToPath } from "node:url"; +import { Worker } from "node:worker_threads"; + +const workerPath = fileURLToPath(new URL("./loadedModelsStubWorker.cjs", import.meta.url)); + +export interface LoadedModelsStub { + /** Base URL to point the Manager at (no trailing slash). */ + readonly url: string; + /** Replace the set of loaded model ids reported by `GET /models/loaded`. */ + setLoaded(ids: readonly string[]): void; + /** Stop the server and join the worker. */ + close(): Promise; +} + +/** + * Start the stub on an ephemeral loopback port. Resolves once it is listening. + * + * @param initialLoaded ids reported as loaded before any {@link LoadedModelsStub.setLoaded} call. + */ +export async function startLoadedModelsStub(initialLoaded: readonly string[] = []): Promise { + const worker = new Worker(workerPath, { workerData: { initialLoaded: [...initialLoaded] } }); + + const port = await new Promise((resolve, reject) => { + worker.once("error", reject); + worker.once("message", (msg: { type?: string; port?: number }) => { + if (msg?.type === "listening" && typeof msg.port === "number") { + resolve(msg.port); + return; + } + + reject(new Error(`unexpected message from stub worker: ${JSON.stringify(msg)}`)); + }); + }); + + return { + url: `http://127.0.0.1:${port}`, + setLoaded(ids: readonly string[]): void { + worker.postMessage({ type: "setLoaded", ids: [...ids] }); + }, + close(): Promise { + return new Promise((resolve) => { + worker.once("exit", () => resolve()); + worker.postMessage({ type: "close" }); + }); + }, + }; +} diff --git a/sdk_v2/js/test/_fixtures/loadedModelsStubWorker.cjs b/sdk_v2/js/test/_fixtures/loadedModelsStubWorker.cjs new file mode 100644 index 00000000..aa2cfed2 --- /dev/null +++ b/sdk_v2/js/test/_fixtures/loadedModelsStubWorker.cjs @@ -0,0 +1,46 @@ +"use strict"; + +// Worker-thread body for the loaded-models stub. Runs on its own event loop so +// it can serve requests even while the SDK's *synchronous* native load-state +// calls (getLoadedModels / isLoaded) block the test's main thread. See +// loadedModelsStub.ts for the contract. +const http = require("node:http"); +const { workerData, parentPort } = require("node:worker_threads"); + +let loaded = Array.isArray(workerData && workerData.initialLoaded) ? workerData.initialLoaded.slice() : []; + +const server = http.createServer((req, res) => { + const path = (req.url || "").split("?")[0]; + + if (req.method === "GET" && path === "/models/loaded") { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify(loaded)); + return; + } + + if (req.method === "GET" && (path.startsWith("/models/load/") || path.startsWith("/models/unload/"))) { + res.writeHead(200, { "content-type": "application/json" }); + res.end("{}"); + return; + } + + res.writeHead(404, { "content-type": "text/plain" }); + res.end("not found"); +}); + +if (parentPort) { + parentPort.on("message", (msg) => { + if (msg && msg.type === "setLoaded") { + loaded = Array.isArray(msg.ids) ? msg.ids.slice() : []; + } else if (msg && msg.type === "close") { + // Force exit; libcurl may hold a keep-alive connection that would stall server.close(). + process.exit(0); + } + }); +} + +server.listen(0, "127.0.0.1", () => { + if (parentPort) { + parentPort.postMessage({ type: "listening", port: server.address().port }); + } +}); diff --git a/sdk_v2/js/test/catalog.test.ts b/sdk_v2/js/test/catalog.test.ts index 22074a57..c9bcf272 100644 --- a/sdk_v2/js/test/catalog.test.ts +++ b/sdk_v2/js/test/catalog.test.ts @@ -12,6 +12,7 @@ import { setupCacheOnlyManager, teardownCacheOnlyManager, } from "./_fixtures/cacheOnlyManager.js"; +import { type LoadedModelsStub, startLoadedModelsStub } from "./_fixtures/loadedModelsStub.js"; const describeIfBuilt = haveNativePrereqs ? describe : describe.skip; @@ -22,15 +23,21 @@ if (!haveNativePrereqs) { describeIfBuilt("Catalog (cache-only)", () => { let fixture: CacheOnlyManagerFixture; + let stub: LoadedModelsStub; let catalog: Catalog; - beforeAll(() => { - fixture = setupCacheOnlyManager({ appName: "foundry-local-js-sdk-v2-catalog-tests" }); + beforeAll(async () => { + stub = await startLoadedModelsStub(); + fixture = setupCacheOnlyManager({ + appName: "foundry-local-js-sdk-v2-catalog-tests", + serviceEndpoint: stub.url, + }); catalog = fixture.manager.catalog; }); - afterAll(() => { + afterAll(async () => { teardownCacheOnlyManager(fixture); + await stub.close(); }); it("name returns a non-empty string", () => { @@ -94,7 +101,9 @@ describeIfBuilt("Catalog (cache-only)", () => { } }); - it("getLoadedModels returns an empty array (nothing loaded)", async () => { + it("getLoadedModels routes to the external service and returns an empty array when nothing is loaded", async () => { + // The fixture points at a live stub service that reports no loaded models, so this exercises + // the external load-state routing end-to-end and resolves to an empty list. const loaded = await catalog.getLoadedModels(); expect(loaded).toEqual([]); }); diff --git a/sdk_v2/js/test/model.test.ts b/sdk_v2/js/test/model.test.ts index bbf0c801..11bb974f 100644 --- a/sdk_v2/js/test/model.test.ts +++ b/sdk_v2/js/test/model.test.ts @@ -13,6 +13,7 @@ import { setupCacheOnlyManager, teardownCacheOnlyManager, } from "./_fixtures/cacheOnlyManager.js"; +import { type LoadedModelsStub, startLoadedModelsStub } from "./_fixtures/loadedModelsStub.js"; const describeIfBuilt = haveNativePrereqs ? describe : describe.skip; @@ -23,17 +24,23 @@ if (!haveNativePrereqs) { describeIfBuilt("Model (cache-only)", () => { let fixture: CacheOnlyManagerFixture; + let stub: LoadedModelsStub; let catalog: Catalog; let model: Model; beforeAll(async () => { - fixture = setupCacheOnlyManager({ appName: "foundry-local-js-sdk-v2-model-tests" }); + stub = await startLoadedModelsStub(); + fixture = setupCacheOnlyManager({ + appName: "foundry-local-js-sdk-v2-model-tests", + serviceEndpoint: stub.url, + }); catalog = fixture.manager.catalog; model = (await catalog.getModel("phi-4-mini-instruct")) as Model; }); - afterAll(() => { + afterAll(async () => { teardownCacheOnlyManager(fixture); + await stub.close(); }); it("info returns a plain object with all required fields populated", () => { @@ -76,7 +83,9 @@ describeIfBuilt("Model (cache-only)", () => { expect(typeof model.isCached).toBe("boolean"); }); - it("isLoaded returns false (model is not actually loaded)", async () => { + it("isLoaded routes to the external service and returns false when the model is not loaded", async () => { + // The fixture points at a live stub service that reports no loaded models, so this exercises + // the external load-state routing end-to-end and resolves to false. expect(await model.isLoaded()).toBe(false); });