diff --git a/sdk_v2/cpp/include/foundry_local/foundry_local_c.h b/sdk_v2/cpp/include/foundry_local/foundry_local_c.h index 0f61ab3a..045f3930 100644 --- a/sdk_v2/cpp/include/foundry_local/foundry_local_c.h +++ b/sdk_v2/cpp/include/foundry_local/foundry_local_c.h @@ -955,6 +955,20 @@ struct flCatalogApi { FL_API_STATUS(GetCachedModels, _In_ const flCatalog* catalog, _Outptr_ flModelList** out_models); FL_API_STATUS(GetLoadedModels, _In_ const flCatalog* catalog, _Outptr_ flModelList** out_models); + /// Get all versions of a model alias, optionally narrowed to a specific model name. + /// @param model_alias Alias of the model (e.g. "phi-4-mini"). Must be non-NULL and non-empty. + /// @param model_name Optional model name (ModelInfo.Name, e.g. "Phi-4-generic-gpu"). NULL returns + /// every model name. + /// @param max_versions Select latest X versions per model name. Pass 0 (or any + /// negative value) for no per-model-name cap. + /// Each call performs a fresh catalog query; results are not integrated into the + /// catalog's main lookup indices. Returned handles are owned by the catalog until + /// the next GetModelVersions call for the same alias or until the catalog is released. + /// Queries for different aliases do not invalidate each other's results. + /// Releasing the list does not invalidate the underlying model handles. + FL_API_STATUS(GetModelVersions, _In_ const flCatalog* catalog, _In_ const char* model_alias, + _In_opt_ const char* model_name, int32_t max_versions, _Outptr_ flModelList** out_models); + // End V1 }; diff --git a/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.h b/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.h index f9a834e6..fe628130 100644 --- a/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.h +++ b/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.h @@ -765,6 +765,18 @@ class ICatalog { virtual std::unique_ptr GetModel(const std::string& alias) const = 0; virtual std::unique_ptr GetModelVariant(const std::string& model_id) const = 0; virtual std::unique_ptr GetLatestVersion(const IModel& model) const = 0; + + /// Get all versions of a model alias. `model_alias` must be non-empty. + /// `variant_name` optionally narrows the result to a single variant; empty + /// returns every variant. `max_versions` selects the latest X versions per + /// variant name (defaults to 50, matching the web service contract); pass 0 + /// or a negative value for no per-variant cap. Each call performs a fresh + /// query and the returned model handles remain valid until the next + /// GetModelVersions call for the same alias or until the catalog is destroyed. + /// Queries for different aliases do not invalidate each other's results. + virtual ModelList GetModelVersions(const std::string& model_alias, + const std::string& variant_name = {}, + int max_versions = 50) = 0; }; // =========================================================================== @@ -787,6 +799,9 @@ class Catalog final : public ICatalog { std::unique_ptr GetModel(const std::string& alias) const override; std::unique_ptr GetModelVariant(const std::string& model_id) const override; std::unique_ptr GetLatestVersion(const IModel& model) const override; + ModelList GetModelVersions(const std::string& model_alias, + const std::string& variant_name = {}, + int max_versions = 50) override; private: detail::Base handle_; diff --git a/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.inline.h b/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.inline.h index f543d87a..08c8f594 100644 --- a/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.inline.h +++ b/sdk_v2/cpp/include/foundry_local/foundry_local_cpp.inline.h @@ -611,6 +611,19 @@ inline std::unique_ptr Catalog::GetLatestVersion(const IModel& model) co return std::make_unique(*m); } +inline ModelList Catalog::GetModelVersions(const std::string& model_alias, + const std::string& variant_name, + int max_versions) { + flModelList* models = nullptr; + Check(detail::catalog_api()->GetModelVersions( + handle_.get(), + model_alias.c_str(), + variant_name.empty() ? nullptr : variant_name.c_str(), + max_versions, + &models)); + return ModelList(*models); +} + // =========================================================================== // Item // =========================================================================== diff --git a/sdk_v2/cpp/src/c_api.cc b/sdk_v2/cpp/src/c_api.cc index 076a480c..dbf49cc0 100644 --- a/sdk_v2/cpp/src/c_api.cc +++ b/sdk_v2/cpp/src/c_api.cc @@ -687,6 +687,34 @@ FL_API_STATUS_IMPL(Catalog_GetNameImpl, const flCatalog* catalog, const char** o API_IMPL_END } +FL_API_STATUS_IMPL(Catalog_GetModelVersionsImpl, const flCatalog* catalog, + const char* model_alias, const char* model_name, + int32_t max_versions, flModelList** out_models) { + API_IMPL_BEGIN + if (!catalog || !model_alias || !out_models) { + return MakeStatus(FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT, "null argument"); + } + + if (*model_alias == '\0') { + return MakeStatus(FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT, "model_alias must not be empty"); + } + + std::string alias = model_alias; + std::string model_name_filter = model_name ? model_name : std::string{}; + + auto models = catalog->impl.GetModelVersions(alias, model_name_filter, max_versions); + auto list = std::make_unique(); + list->items.reserve(models.size()); + + for (auto* m : models) { + list->items.push_back(AsHandle(m)); + } + + *out_models = list.release(); + return nullptr; + API_IMPL_END +} + static const flCatalogApi g_catalog_api = { Catalog_GetNameImpl, Catalog_GetModelsImpl, @@ -695,6 +723,7 @@ static const flCatalogApi g_catalog_api = { Catalog_GetLatestVersionImpl, Catalog_GetCachedModelsImpl, Catalog_GetLoadedModelsImpl, + Catalog_GetModelVersionsImpl, }; // ======================================================================== diff --git a/sdk_v2/cpp/src/catalog.h b/sdk_v2/cpp/src/catalog.h index 8379aa3c..71aedbc1 100644 --- a/sdk_v2/cpp/src/catalog.h +++ b/sdk_v2/cpp/src/catalog.h @@ -31,6 +31,26 @@ class ICatalog { /// Gets the latest version of a model. Returns nullptr if not found. virtual Model* GetLatestVersion(const Model* model) const = 0; + /// Lists all known versions of a model (by alias), optionally filtered to a + /// specific variant name. Bypasses the "latest only" filter the regular + /// catalog refresh applies and performs a fresh source query on each call. + /// Results are not integrated into the catalog's main lookup indices. + /// Returned pointers are owned by the catalog until the next + /// GetModelVersions call for the same alias or until the catalog is destroyed. + /// Queries for different aliases do not invalidate each other's results. + /// + /// `model_alias` is the alias of the model (e.g. "phi-4-mini") and must not + /// be empty. + /// `variant_name` optionally narrows results to a specific variant (e.g. + /// "Phi-4-generic-gpu"). Pass an empty string to return every variant. + /// `max_versions` selects the latest X versions per variant name for the + /// alias. 0 or negative means no per-variant cap. + /// + /// Maps to C# `IModelCatalog.GetModelVersionsAsync`. + virtual std::vector GetModelVersions(const std::string& model_alias, + const std::string& variant_name, + int max_versions = 0) = 0; + /// Lists only models that are cached locally. virtual std::vector GetCachedModels() const = 0; diff --git a/sdk_v2/cpp/src/catalog/azure_catalog_client.cc b/sdk_v2/cpp/src/catalog/azure_catalog_client.cc index f39f66b4..af7a7acf 100644 --- a/sdk_v2/cpp/src/catalog/azure_catalog_client.cc +++ b/sdk_v2/cpp/src/catalog/azure_catalog_client.cc @@ -7,6 +7,8 @@ #include +#include +#include #include #include #include @@ -183,18 +185,34 @@ std::vector ToModelInfos(const std::vector& raw_mo return infos; } -std::vector> BuildSearchFilters(const IEpDetector& ep_detector, - const std::vector& model_filter) { +/// Build per-device filter sets for catalog queries. +/// `latest_only` controls whether to include the `labels=latest` filter (default true for latest models). +/// `model_alias` scopes results to a specific alias when non-empty; when empty, no alias filter is applied. +/// `model_name` scopes results to a specific model name when non-empty for server-side filtering. +/// Each filter set queries for variants on a specific device/EP pair; the catalog API matches on the +/// (device, execution provider) pair. +std::vector> BuildSearchFilters( + const IEpDetector& ep_detector, + const std::vector& model_filter, + bool latest_only = true, + const std::string& model_alias = "", + const std::string& model_name = "") { std::vector> filter_sets; - // One filter set per detected device. The catalog API matches on the - // (device, execution provider) pair, so we keep the EPs grouped by device. for (const auto& [device, eps] : ep_detector.GetAvailableDevicesToEPs()) { std::vector filters; filters.push_back(MakeFilter("type", {"models"})); filters.push_back(MakeFilter("kind", {"Versioned"})); - filters.push_back(MakeFilter("labels", {"latest"})); + if (latest_only) { + filters.push_back(MakeFilter("labels", {"latest"})); + } filters.push_back(MakeFilter("annotations/tags/foundryLocal", model_filter)); + if (!model_alias.empty()) { + filters.push_back(MakeFilter("annotations/tags/alias", {model_alias})); + } + if (!model_name.empty()) { + filters.push_back(MakeFilter("properties/name", {model_name})); + } filters.push_back(MakeFilter("properties/variantInfo/variantMetadata/device", {ToLower(device)})); filters.push_back(MakeFilter("properties/variantInfo/variantMetadata/executionProvider", eps)); filter_sets.push_back(std::move(filters)); @@ -203,6 +221,7 @@ std::vector> BuildSearchFilters(const IEpDetector& ep return filter_sets; } + std::vector BuildModelIdFilters(const std::vector& model_filter, const std::vector& model_ids) { // Looking up specific IDs: no labels=latest (we want exact versions) and no @@ -273,8 +292,7 @@ std::optional AzureCatalogClient::FetchFil if (regional && !pinned_region) { // Page 1: run through region fallback starting from the sticky region (last known-good) or the active region. // Exhaustion means every candidate had a retryable region-health failure, so fail just this filter set. - const std::string start = - region_fallback_.StickyRegion().value_or(region_); + const std::string start = region_fallback_.StickyRegion().value_or(region_); try { auto fallback_result = region_fallback_.Execute(start, [&](const std::string& r) { return http_post_response_(BuildRegionalUrl(url_prefix_, url_suffix_, r), body); @@ -384,6 +402,33 @@ std::vector AzureCatalogClient::FetchModelsByIds( return ToModelInfos(result->models, result->region); } +std::vector AzureCatalogClient::FetchAllVersionsByAlias( + const std::string& model_alias, + const std::string& model_name, + int /*max_versions*/) { + // Fetch all versions of the alias across per-device filter sets. Each filter set + // queries for variants matching the alias on a specific device/EP pair; the results + // are aggregated. The caller applies per-variant version caps (latest X per variant). + const auto filter_sets = BuildSearchFilters(ep_detector_, model_filter_, /*latest_only=*/false, + model_alias, model_name); + + std::vector result; + + for (const auto& filters : filter_sets) { + auto walk = FetchFilterSet(filters); + if (!walk) { + continue; + } + + auto batch = ToModelInfos(walk->models, walk->region); + result.insert(result.end(), + std::make_move_iterator(batch.begin()), + std::make_move_iterator(batch.end())); + } + + return result; +} + std::unique_ptr MakeCatalogClient( const std::string& base_url, const std::string& filter_override, diff --git a/sdk_v2/cpp/src/catalog/azure_catalog_client.h b/sdk_v2/cpp/src/catalog/azure_catalog_client.h index 63df17ff..c87c650b 100644 --- a/sdk_v2/cpp/src/catalog/azure_catalog_client.h +++ b/sdk_v2/cpp/src/catalog/azure_catalog_client.h @@ -52,6 +52,15 @@ class AzureCatalogClient : public ICatalogClient { std::vector FetchModelsByIds(const std::vector& model_ids) override; + /// Fetch every version of `model_alias` from the live catalog by issuing the + /// per-device search with `labels=latest` removed and an alias filter added. + /// `model_alias` must be non-empty. Optionally filters by `model_name`. + /// The client fetches all matching versions, while the caller applies + /// `max_versions` semantics (latest X per variant). + std::vector FetchAllVersionsByAlias(const std::string& model_alias, + const std::string& model_name = "", + int max_versions = 0) override; + private: struct FetchedFilterSet { std::vector models; @@ -63,6 +72,7 @@ class AzureCatalogClient : public ICatalogClient { std::optional FetchFilterSet(const std::vector& filters); /// Fetch every device filter set, dropping the ones that failed their region-health checks. + /// Used for unbounded "latest only" / by-id queries. std::vector FetchAllFilterSets(); std::string base_url_; diff --git a/sdk_v2/cpp/src/catalog/azure_model_catalog.cc b/sdk_v2/cpp/src/catalog/azure_model_catalog.cc index 430e348a..39afcae3 100644 --- a/sdk_v2/cpp/src/catalog/azure_model_catalog.cc +++ b/sdk_v2/cpp/src/catalog/azure_model_catalog.cc @@ -11,6 +11,9 @@ #include #include +#include +#include + namespace fl { AzureModelCatalog::AzureModelCatalog(std::vector>> catalog_urls, @@ -30,6 +33,10 @@ AzureModelCatalog::AzureModelCatalog(std::vector(kDefaultCatalogFilter)); + } + logger_.Log(LogLevel::Information, fmt::format("Created AzureModelCatalog. Cache directory: {}", cache_dir_)); @@ -104,23 +111,13 @@ std::vector AzureModelCatalog::FetchModels() const { } }; - if (catalog_urls_.empty()) { - // Use default Azure Foundry catalog + for (const auto& [url, filter] : catalog_urls_) { try { - fetch_from(kDefaultCatalogUrl, kDefaultCatalogFilter); + fetch_from(url, filter); } catch (const std::exception& ex) { + // One failing URL shouldn't block others — skip and continue. logger_.Log(LogLevel::Error, - fmt::format("failed to fetch catalog from default URL: {}", ex.what())); - } - } else { - for (const auto& [url, filter] : catalog_urls_) { - try { - fetch_from(url, filter); - } catch (const std::exception& ex) { - // One failing URL shouldn't block others — skip and continue. - logger_.Log(LogLevel::Error, - fmt::format("failed to fetch catalog from {}: {}", url, ex.what())); - } + fmt::format("failed to fetch catalog from {}: {}", url, ex.what())); } } @@ -138,4 +135,90 @@ std::vector AzureModelCatalog::FetchModels() const { return models; } +std::vector AzureModelCatalog::FetchModelVersions( + const std::string& model_alias, + const std::string& model_name) const { + std::vector out; + if (cache_only_) { + // In cache-only mode we have no remote source to query for older versions. + logger_.Log(LogLevel::Debug, + "FetchModelVersions skipped: catalog is in cache-only mode."); + return out; + } + + for (const auto& [url, filter] : catalog_urls_) { + try { + auto client = MakeCatalogClient(url, filter.value_or(""), ep_detector_, logger_, cache_dir_, + catalog_region_, disable_region_fallback_); + auto model_infos = client->FetchAllVersionsByAlias(model_alias, model_name); + + out.reserve(out.size() + model_infos.size()); + for (auto& info : model_infos) { + out.push_back(model_factory_(std::move(info), /*local_path=*/"")); + } + } catch (const std::exception& ex) { + logger_.Log(LogLevel::Error, + fmt::format("FetchModelVersions: failed to query {} — {}", url, ex.what())); + } + } + + logger_.Log(LogLevel::Information, + fmt::format("FetchModelVersions('{}') returned {} variant(s).", + model_alias, out.size())); + + return out; +} + +std::vector AzureModelCatalog::FetchModelsByIds(const std::vector& model_ids) const { + if (model_ids.empty()) { + return {}; + } + + if (cache_only_) { + logger_.Log(LogLevel::Debug, + "FetchModelsByIds skipped: catalog is in cache-only mode."); + return {}; + } + + auto local_models = ScanLocalModels(cache_dir_, logger_); + + std::vector models; + // Track which IDs are still unresolved so we can stop calling further + // endpoints once everything has been found. + std::vector remaining(model_ids); + + for (const auto& [url, filter] : catalog_urls_) { + if (remaining.empty()) { + break; + } + + try { + auto client = MakeCatalogClient(url, filter.value_or(""), ep_detector_, logger_, cache_dir_, + catalog_region_, disable_region_fallback_); + auto model_infos = client->FetchModelsByIds(remaining); + + for (auto& info : model_infos) { + std::string local_path; + auto it = local_models.find(info.model_id); + if (it != local_models.end()) { + local_path = it->second; + } + + // Drop this id from the remaining list now that it's resolved. + auto rit = std::find(remaining.begin(), remaining.end(), info.model_id); + if (rit != remaining.end()) { + remaining.erase(rit); + } + + models.push_back(model_factory_(std::move(info), std::move(local_path))); + } + } catch (const std::exception& ex) { + logger_.Log(LogLevel::Error, + fmt::format("FetchModelsByIds: failed to query {} — {}", url, ex.what())); + } + } + + return models; +} + } // namespace fl diff --git a/sdk_v2/cpp/src/catalog/azure_model_catalog.h b/sdk_v2/cpp/src/catalog/azure_model_catalog.h index 9d837566..5769a3ef 100644 --- a/sdk_v2/cpp/src/catalog/azure_model_catalog.h +++ b/sdk_v2/cpp/src/catalog/azure_model_catalog.h @@ -33,6 +33,9 @@ class AzureModelCatalog : public BaseModelCatalog { protected: std::vector FetchModels() const override; + std::vector FetchModelVersions(const std::string& model_alias, + const std::string& model_name = "") const override; + std::vector FetchModelsByIds(const std::vector& model_ids) const override; private: static constexpr const char* kDefaultCatalogUrl = "https://ai.azure.com/api/centralus/ux/v1.0"; diff --git a/sdk_v2/cpp/src/catalog/base_model_catalog.cc b/sdk_v2/cpp/src/catalog/base_model_catalog.cc index af8b78cf..84463451 100644 --- a/sdk_v2/cpp/src/catalog/base_model_catalog.cc +++ b/sdk_v2/cpp/src/catalog/base_model_catalog.cc @@ -4,100 +4,22 @@ #include +#include "exception.h" + #include #include -#include #include #include +#include +#include namespace fl { -namespace { - -// --------------------------------------------------------------------------- -// Model priority sort — ports C# AzureFoundryService.CompareModelsForSort -// --------------------------------------------------------------------------- - -/// Case-insensitive substring search. -bool ContainsCaseInsensitive(const std::string& text, const std::string& pattern) { - auto it = std::search(text.begin(), text.end(), - pattern.begin(), pattern.end(), - [](char a, char b) { return std::tolower(static_cast(a)) == - std::tolower(static_cast(b)); }); - return it != text.end(); -} - -/// Extract device-type priority from model_id. -/// Format: -: -/// Returns: 0(NPU) < 1(vendor-GPU) < 2(CUDA-GPU) < 3(generic-GPU) -/// < 4(vendor-CPU) < 5(generic-CPU) < 6(unknown) -int GetModelDevicePriority(const std::string& model_id) { - if (ContainsCaseInsensitive(model_id, "-npu:")) { - return 0; - } - - // Check generic-gpu before -gpu: so "-generic-gpu:" isn't caught by the broader "-gpu:" check. - if (ContainsCaseInsensitive(model_id, "-generic-gpu:")) { - return 3; - } - - if (ContainsCaseInsensitive(model_id, "-cuda-gpu:")) { - return 2; - } - - if (ContainsCaseInsensitive(model_id, "-gpu:")) { - return 1; - } - - if (ContainsCaseInsensitive(model_id, "-generic-cpu:")) { - return 5; - } - - if (ContainsCaseInsensitive(model_id, "-cpu:")) { - return 4; - } - - return 6; -} - -/// Comparator for sorting variants by priority within the catalog. -/// Criteria (matching C# CompareModelsForSort): -/// 1. Device-type priority (ascending — lower number = better) -/// 2. Version number (descending — higher version first) -/// 3. CreatedAtUnix timestamp (descending — newer first) -bool CompareModelsForSort(const Model& m1, const Model& m2) { - const auto& info1 = m1.Info(); - const auto& info2 = m2.Info(); - - int p1 = GetModelDevicePriority(info1.model_id); - int p2 = GetModelDevicePriority(info2.model_id); - - if (p1 != p2) { - return p1 < p2; - } - - if (info1.version != info2.version) { - return info1.version > info2.version; - } - - int64_t created1 = info1.GetPropertyWithDefault(FOUNDRY_LOCAL_MODEL_PROP_CREATED_AT_UNIX_INT, int64_t{0}); - int64_t created2 = info2.GetPropertyWithDefault(FOUNDRY_LOCAL_MODEL_PROP_CREATED_AT_UNIX_INT, int64_t{0}); - - return created1 > created2; -} - -} // anonymous namespace - BaseModelCatalog::BaseModelCatalog(std::string name, ILogger& logger) : name_(std::move(name)), logger_(logger) {} BaseModelCatalog::~BaseModelCatalog() = default; void BaseModelCatalog::PopulateModels(std::vector variants) const { - // Sort variants by device priority (asc), version (desc), created_at (desc). - // This ensures the best variant ends up first in each alias group, matching the C# - // AzureFoundryService.SortModels() behavior. - std::stable_sort(variants.begin(), variants.end(), CompareModelsForSort); - // Group variants by alias into Model containers. // Matches C# Catalog.UpdateModels() pattern: // foreach (modelInfo) { find or create Model by alias, add variant } @@ -121,6 +43,10 @@ void BaseModelCatalog::PopulateModels(std::vector variants) const { } } + for (auto& [alias, model] : alias_to_model) { + model.SelectDefaultVariant(); + } + // On refresh: merge new models into stable storage. Existing models keep their addresses. // New aliases are appended. Existing aliases are left unchanged (their Model* stays valid). if (populated_) { @@ -162,6 +88,84 @@ void BaseModelCatalog::PopulateModels(std::vector variants) const { populated_ = true; } +void BaseModelCatalog::IntegrateVariants(std::vector variants) const { + std::lock_guard lock(mutex_); + + if (variants.empty()) { + return; + } + + // Build a lookup of existing aliases -> containers so we can merge new + // variants in O(1) per incoming variant. + std::unordered_map alias_to_existing; + for (auto& m : models_) { + alias_to_existing[m->Alias()] = m.get(); + } + + // Track existing model_ids in a single set so the dedup check is O(1) and + // doesn't require walking each container's variants per incoming variant. + std::unordered_set existing_ids; + for (auto& m : models_) { + for (auto* v : m->Variants()) { + existing_ids.insert(v->Info().model_id); + } + } + + size_t added_variants = 0; + size_t added_aliases = 0; + + // Collect-by-alias the variants that are actually new (not already known). + std::map> new_by_alias; + for (auto& v : variants) { + const auto& info = v.Info(); + if (info.model_id.empty() || info.alias.empty() || info.name.empty()) { + logger_.Log(LogLevel::Debug, + fmt::format("IntegrateVariants: skipping model with missing required fields: " + "id='{}', name='{}', alias='{}'.", + info.model_id, info.name, info.alias)); + continue; + } + + if (existing_ids.count(info.model_id) > 0) { + continue; + } + + existing_ids.insert(info.model_id); + new_by_alias[info.alias].push_back(std::move(v)); + } + + for (auto& [alias, alias_variants] : new_by_alias) { + auto it = alias_to_existing.find(alias); + if (it != alias_to_existing.end()) { + for (auto& v : alias_variants) { + it->second->AddVariant(std::move(v)); + ++added_variants; + } + } else { + // New alias: build a container and choose default after all variants are added. + auto first = std::move(alias_variants.front()); + auto container = Model::MakeContainer(std::move(first)); + for (size_t i = 1; i < alias_variants.size(); ++i) { + container.AddVariant(std::move(alias_variants[i])); + } + + container.SelectDefaultVariant(); + + models_.push_back(std::make_unique(std::move(container))); + ++added_aliases; + added_variants += alias_variants.size(); + } + } + + if (added_variants > 0 || added_aliases > 0) { + logger_.Log(LogLevel::Information, + fmt::format("Catalog '{}' integrated {} new variant(s) across {} new alias(es). " + "{} total alias container(s).", + name_, added_variants, added_aliases, models_.size())); + RebuildIndex(); + } +} + void BaseModelCatalog::RebuildIndex() const { auto new_index = std::make_shared(); @@ -276,6 +280,42 @@ Model* BaseModelCatalog::GetModelVariant(const std::string& model_id) const { return id_it->second; } + // Not in cached indices — try a direct catalog lookup. Only attempt this when + // the input looks like a Model Id (Name + ":" + Version). Plain names and + // aliases would not succeed via FetchModelsByIds and just cost a network call. + // Mirrors C# BaseModelCatalog.GetModelInfoAsync direct-fetch fallback. + if (model_id.find(':') != std::string::npos) { + logger_.Log(LogLevel::Information, + fmt::format("GetModelVariant: '{}' not in cache, fetching from catalog source.", + model_id)); + + std::vector fetched; + try { + fetched = FetchModelsByIds({model_id}); + } catch (const std::exception& ex) { + logger_.Log(LogLevel::Warning, + fmt::format("GetModelVariant: direct fetch for '{}' failed — {}", + model_id, ex.what())); + return nullptr; + } catch (...) { + logger_.Log(LogLevel::Warning, + fmt::format("GetModelVariant: direct fetch for '{}' failed — unknown error", + model_id)); + return nullptr; + } + + if (!fetched.empty()) { + IntegrateVariants(std::move(fetched)); + + // Look up again from the refreshed index. + idx = GetIndex(); + auto id_it2 = idx->id_index.find(model_id); + if (id_it2 != idx->id_index.end()) { + return id_it2->second; + } + } + } + logger_.Log(LogLevel::Information, fmt::format("GetModelVariant: '{}' not found in the catalog.", model_id)); @@ -329,4 +369,100 @@ std::vector BaseModelCatalog::GetLoadedModels() const { return result; } +std::vector BaseModelCatalog::GetModelVersions(const std::string& model_alias, + const std::string& variant_name, + int max_versions) { + if (model_alias.empty()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT, "GetModelVersions requires a non-empty model_alias."); + } + + // Make sure the regular "latest only" catalog is populated first so the + // existing alias set is available for logging/validation. + EnsurePopulated(); + + std::vector fetched; + try { + fetched = FetchModelVersions(model_alias, variant_name); // variant_name is used as model_name filter + } catch (const std::exception& ex) { + logger_.Log(LogLevel::Warning, + fmt::format("GetModelVersions: fetch for alias '{}' failed — {}", + model_alias, ex.what())); + return {}; + } catch (...) { + logger_.Log(LogLevel::Warning, + fmt::format("GetModelVersions: fetch for alias '{}' failed — unknown error", + model_alias)); + return {}; + } + + if (fetched.empty()) { + return {}; + } + + std::vector result; + auto idx = GetIndex(); + + { + std::lock_guard lock(mutex_); + + // Build a container Model for this alias, mirroring the structure used by + // PopulateModels / IntegrateVariants. AddVariant maintains best-first order. + auto container = Model::MakeContainer(std::move(fetched.front())); + for (size_t i = 1; i < fetched.size(); ++i) { + container.AddVariant(std::move(fetched[i])); + } + + container.SelectDefaultVariant(); + version_query_models_[model_alias] = std::make_unique(std::move(container)); + + // Return variant pointers from the container (like Model_GetVariantsImpl). + auto variants = version_query_models_[model_alias]->Variants(); + result.reserve(variants.size()); + for (auto* v : variants) { + if (!variant_name.empty() && v->Info().name != variant_name) { + continue; + } + + result.push_back(v); + } + } + + if (result.empty()) { + // Source returned nothing — log when the alias was unknown. + auto alias_it = idx->alias_index.find(model_alias); + if (alias_it == idx->alias_index.end()) { + logger_.Log(LogLevel::Information, + fmt::format("GetModelVersions: alias '{}' not found in catalog.", model_alias)); + } + } + + // Sort into best-first order. Within the same variant name, device priority is identical, + // so this also groups by name with latest version first — suitable for max_versions capping. + std::stable_sort(result.begin(), result.end(), [](const Model* a, const Model* b) { + return Model::CompareBestFirst(*a, *b); + }); + + if (max_versions > 0 && !result.empty()) { + // Enforce latest N per variant name from the best-first ordering. + std::unordered_map selected_per_variant; + std::vector limited; + limited.reserve(result.size()); + + for (Model* model : result) { + const std::string& variant = model->Info().name; + int& count = selected_per_variant[variant]; + if (count >= max_versions) { + continue; + } + + ++count; + limited.push_back(model); + } + + result = std::move(limited); + } + + return result; +} + } // namespace fl diff --git a/sdk_v2/cpp/src/catalog/base_model_catalog.h b/sdk_v2/cpp/src/catalog/base_model_catalog.h index ad74d2f8..36229d68 100644 --- a/sdk_v2/cpp/src/catalog/base_model_catalog.h +++ b/sdk_v2/cpp/src/catalog/base_model_catalog.h @@ -21,7 +21,9 @@ namespace fl { /// Model ownership: The catalog owns all Model instances via unique_ptr in models_. /// These pointers are stable for the lifetime of the catalog — external code can hold /// raw Model* pointers safely. Indices (id_index, alias_index, name_index) are rebuilt -/// on refresh but always point into the stable models_ storage. +/// on refresh but always point into the stable models_ storage. GetModelVersions uses +/// separate transient storage because its results are query-only and not integrated into +/// the main indices. /// /// Maps to C# BaseModelCatalog. class BaseModelCatalog : public ICatalog { @@ -38,6 +40,9 @@ class BaseModelCatalog : public ICatalog { Model* GetLatestVersion(const Model* model) const override; std::vector GetCachedModels() const override; std::vector GetLoadedModels() const override; + std::vector GetModelVersions(const std::string& model_alias, + const std::string& variant_name, + int max_versions = 0) override; void InvalidateCache() override; protected: @@ -46,6 +51,27 @@ class BaseModelCatalog : public ICatalog { /// Maps to C# FetchModelInfoAsync. virtual std::vector FetchModels() const = 0; + /// Derived classes implement this to fetch all versions of a model from the + /// underlying catalog source, bypassing the "latest only" filter. + /// Returns the variants (in any order; the base class sorts/indexes them) + /// for the given alias. + /// Default implementation returns `{}` (no remote source — local-only catalogs). + /// Maps to C# `BaseModelCatalog.GetModelVersionsAsync` -> derived overrides. + virtual std::vector FetchModelVersions( + const std::string& /*model_alias*/, + const std::string& /*model_name*/ = "") const { + return {}; + } + + /// Derived classes implement this to look up specific model versions by ID + /// from the underlying catalog source (e.g., older versions not in the + /// latest catalog). Empty list if `model_ids` is empty. + /// Default implementation returns `{}`. + /// Maps to C# `BaseModelCatalog.FetchLocalModelsAsync`. + virtual std::vector FetchModelsByIds(const std::vector& /*model_ids*/) const { + return {}; + } + private: /// Lookup indices into the stable models_ storage. /// Rebuilt on refresh. Does not own any Model instances. @@ -79,6 +105,12 @@ class BaseModelCatalog : public ICatalog { /// Populate or refresh the catalog (under lock). Groups variants, builds indices. void PopulateModels(std::vector variants) const; + /// Merge new variants into the catalog's stable storage. For an + /// existing alias container, appends any variants whose model_id isn't already + /// present. For new aliases, creates a new container. Rebuilds the lookup + /// index when the model set actually changed. + void IntegrateVariants(std::vector variants) const; + /// Build lookup indices from the current models_ collection. /// Builds a complete new ModelIndex locally, then atomically swaps it into index_. void RebuildIndex() const; @@ -86,6 +118,13 @@ class BaseModelCatalog : public ICatalog { /// Thread-safe access: ensures catalog is populated, refreshes if allowed and stale. void EnsurePopulated(bool allow_refresh = false) const; + /// Per-alias transient storage for GetModelVersions queries. Each call replaces only the + /// entry for the queried alias, so pointers from prior queries on other aliases remain valid. + /// These models are intentionally not integrated into the main lookup indices. + /// Each entry is a container Model (created via MakeContainer) whose variants are the + /// individual version results — mirroring the structure used by the main models_ list. + mutable std::unordered_map> version_query_models_; + std::string name_; ILogger& logger_; }; diff --git a/sdk_v2/cpp/src/catalog/catalog_client.h b/sdk_v2/cpp/src/catalog/catalog_client.h index 7e4930de..e3afcfe7 100644 --- a/sdk_v2/cpp/src/catalog/catalog_client.h +++ b/sdk_v2/cpp/src/catalog/catalog_client.h @@ -27,6 +27,26 @@ class ICatalogClient { /// be resolved. virtual std::vector FetchModelsByIds( const std::vector& model_ids) = 0; + + /// Fetch all known versions of a model (by alias), bypassing the "latest only" + /// filter that `FetchAllModelInfos` applies. Maps to C# + /// `IAzureFoundryApiService.FetchAllModelVersionsAsync`. + /// + /// `model_alias` must be non-empty. + /// `model_name` optionally filters by variant name (default empty = no filter). + /// `max_versions` is applied per variant name (latest X per variant; + /// 0 or negative = no cap). + /// Implementations that cannot list older versions return whatever they have + /// locally. + /// + /// Provided with a default `{}` body so an implementation that has not yet + /// overridden it still compiles. + virtual std::vector FetchAllVersionsByAlias( + const std::string& /*model_alias*/, + const std::string& /*model_name*/ = "", + int /*max_versions*/ = 0) { + return {}; + } }; /// Production helper that combines a catalog fetch with locally cached model diff --git a/sdk_v2/cpp/src/model.cc b/sdk_v2/cpp/src/model.cc index d7f2b0c7..1f06201c 100644 --- a/sdk_v2/cpp/src/model.cc +++ b/sdk_v2/cpp/src/model.cc @@ -7,13 +7,97 @@ #include "inferencing/model_load_manager.h" #include "items/item.h" #include "items/text_item.h" +#include "util/string_utils.h" #include "utils.h" #include #include +#include +#include + namespace fl { +namespace { + +// --------------------------------------------------------------------------- +// Model priority sort — ports C# AzureFoundryService.CompareModelsForSort. +// --------------------------------------------------------------------------- + +bool ContainsCaseInsensitive(const std::string& text, const std::string& pattern) { + auto it = std::search(text.begin(), text.end(), + pattern.begin(), pattern.end(), + [](char a, char b) { return std::tolower(static_cast(a)) == + std::tolower(static_cast(b)); }); + return it != text.end(); +} + +/// Extract device-type priority from model_id. +/// Format: -: +/// Returns: 0(NPU) < 1(vendor-GPU) < 2(CUDA-GPU) < 3(generic-GPU) +/// < 4(vendor-CPU) < 5(generic-CPU) < 6(unknown) +int GetModelDevicePriority(const std::string& model_id) { + if (ContainsCaseInsensitive(model_id, "-npu:")) { + return 0; + } + + // Check generic-gpu before -gpu: so "-generic-gpu:" isn't caught by the broader "-gpu:" check. + if (ContainsCaseInsensitive(model_id, "-generic-gpu:")) { + return 3; + } + + if (ContainsCaseInsensitive(model_id, "-cuda-gpu:")) { + return 2; + } + + if (ContainsCaseInsensitive(model_id, "-gpu:")) { + return 1; + } + + if (ContainsCaseInsensitive(model_id, "-generic-cpu:")) { + return 5; + } + + if (ContainsCaseInsensitive(model_id, "-cpu:")) { + return 4; + } + + return 6; +} + +/// Comparator for sorting variants by priority within a container. +/// Criteria (matching C# CompareModelsForSort): +/// 1. Device-type priority (ascending — lower number = better) +/// 2. Version number (descending — higher version first) +/// 3. CreatedAtUnix timestamp (descending — newer first) +/// 4. model_id (ascending) as final tie-break +bool CompareModelsForSort(const Model& m1, const Model& m2) { + const auto& info1 = m1.Info(); + const auto& info2 = m2.Info(); + + int p1 = GetModelDevicePriority(info1.model_id); + int p2 = GetModelDevicePriority(info2.model_id); + + if (p1 != p2) { + return p1 < p2; + } + + if (info1.version != info2.version) { + return info1.version > info2.version; + } + + int64_t created1 = info1.GetPropertyWithDefault(FOUNDRY_LOCAL_MODEL_PROP_CREATED_AT_UNIX_INT, int64_t{0}); + int64_t created2 = info2.GetPropertyWithDefault(FOUNDRY_LOCAL_MODEL_PROP_CREATED_AT_UNIX_INT, int64_t{0}); + + if (created1 != created2) { + return created1 > created2; + } + + return info1.model_id < info2.model_id; +} + +} // namespace + // --------------------------------------------------------------------------- // Lifecycle // --------------------------------------------------------------------------- @@ -78,8 +162,8 @@ Model Model::FromModelInfo(ModelInfo info, Model Model::MakeContainer(Model first_variant) { Model container; - container.variants_.push_back(std::move(first_variant)); - container.selected_variant_ = &container.variants_.back(); + container.variants_.push_back(std::make_unique(std::move(first_variant))); + container.selected_variant_ = container.variants_.back().get(); return container; } @@ -90,20 +174,34 @@ void Model::AddVariant(Model variant) { std::lock_guard lock(state_mutex_); - // Prefer a cached variant over a non-cached selection (matches C# behavior). - bool prefer_new = variant.IsCached() && !selected_variant_->IsCached(); + auto pos = std::upper_bound(variants_.begin(), variants_.end(), variant, + [](const Model& value, const std::unique_ptr& element) { + return CompareModelsForSort(value, *element); + }); + + variants_.insert(pos, std::make_unique(std::move(variant))); +} - // Save selected variant's index before push_back (which may reallocate the vector, - // invalidating the old selected_variant_ pointer). - size_t selected_idx = static_cast(selected_variant_ - variants_.data()); +bool Model::CompareBestFirst(const Model& a, const Model& b) { + return CompareModelsForSort(a, b); +} - variants_.push_back(std::move(variant)); +void Model::SelectDefaultVariant() { + if (!IsContainer()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "SelectDefaultVariant called on a non-container Model; use MakeContainer first"); + } - if (prefer_new) { - selected_variant_ = &variants_.back(); - } else { - selected_variant_ = &variants_[selected_idx]; // Restore after potential reallocation + std::lock_guard lock(state_mutex_); + + for (auto& v : variants_) { + if (v->IsCached()) { + selected_variant_ = v.get(); + return; + } } + + selected_variant_ = variants_.front().get(); } // --------------------------------------------------------------------------- @@ -143,7 +241,7 @@ std::vector Model::Variants() const { for (auto& v : variants_) { // const_cast: the *set* of variants is fixed (this method is const), but each // variant is independently mutable. See header. - result.push_back(const_cast(&v)); + result.push_back(const_cast(v.get())); } } else { result.push_back(const_cast(this)); @@ -260,8 +358,8 @@ void Model::SelectVariant(const Model& variant) { } for (auto& v : variants_) { - if (&v == &variant) { - selected_variant_ = &v; + if (v.get() == &variant) { + selected_variant_ = v.get(); return; } } diff --git a/sdk_v2/cpp/src/model.h b/sdk_v2/cpp/src/model.h index ee666b2f..90710768 100644 --- a/sdk_v2/cpp/src/model.h +++ b/sdk_v2/cpp/src/model.h @@ -59,10 +59,22 @@ class Model { static Model MakeContainer(Model first_variant); /// Add a variant to this container. Requires IsContainer() to be true. - /// If the new variant is cached and the current selection is not, the new variant - /// becomes the selected variant (matches C# behavior). + /// The variant is inserted to keep the container's variant order best-first + /// (device priority asc, version desc, created-at desc). + /// + /// Does not change the current selection. Call SelectDefaultVariant once the + /// container has its full variant set. void AddVariant(Model variant); + /// Choose the default selected variant from the current sorted variant list: + /// first cached variant if any, else the best variant. + /// Requires IsContainer() to be true. + void SelectDefaultVariant(); + + /// Best-first comparator: device priority asc, version desc, created-at desc, model_id asc. + /// Exposed so callers that return Model* lists can produce consistent ordering with Variants(). + static bool CompareBestFirst(const Model& a, const Model& b); + // --- Properties --- /// Unique model identifier. For containers, delegates to the selected variant. @@ -154,8 +166,9 @@ class Model { DownloadManager* download_manager_ = nullptr; ModelLoadManager* model_load_manager_ = nullptr; - // Container data (empty/null for leaves). - std::vector variants_; + // Container data (empty/null for leaves). unique_ptr keeps Model addresses + // stable across vector growth/reordering. + std::vector> variants_; Model* selected_variant_ = nullptr; // non-null = this is a container // Guards variants_ across reader/writer threads (catalog refresh adding variants diff --git a/sdk_v2/cpp/src/util/string_utils.h b/sdk_v2/cpp/src/util/string_utils.h index 14ed30a7..627a8880 100644 --- a/sdk_v2/cpp/src/util/string_utils.h +++ b/sdk_v2/cpp/src/util/string_utils.h @@ -48,4 +48,30 @@ inline bool EndsWithIgnoreCase(const std::string& str, const std::string& suffix }); } +/// Case-insensitive (ASCII) three-way comparison. +/// Returns: < 0 if lhs < rhs, 0 if equal, > 0 if lhs > rhs (all case-insensitive). +inline int CompareCaseInsensitive(const std::string& lhs, const std::string& rhs) { + const size_t common = std::min(lhs.size(), rhs.size()); + for (size_t i = 0; i < common; ++i) { + const auto l = static_cast(lhs[i]); + const auto r = static_cast(rhs[i]); + const char l_lower = static_cast(std::tolower(l)); + const char r_lower = static_cast(std::tolower(r)); + if (l_lower < r_lower) { + return -1; + } + if (l_lower > r_lower) { + return 1; + } + } + + if (lhs.size() < rhs.size()) { + return -1; + } + if (lhs.size() > rhs.size()) { + return 1; + } + return 0; +} + } // namespace fl diff --git a/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc b/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc index 8012d014..4e331d03 100644 --- a/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc +++ b/sdk_v2/cpp/test/internal_api/base_model_catalog_test.cc @@ -17,10 +17,15 @@ #include #include +#include #include using namespace fl; +static Model MakeModel(const std::string& model_id, const std::string& name, + int version, const std::string& alias, + const std::string& local_path); + // ======================================================================== // Concrete test catalog — returns canned models from FetchModels() // ======================================================================== @@ -42,6 +47,67 @@ class TestCatalog : public BaseModelCatalog { mutable std::vector models_; }; +class QueryingTestCatalog : public BaseModelCatalog { + public: + explicit QueryingTestCatalog(ILogger& logger) : BaseModelCatalog("querying-test-catalog", logger) {} + + void AddModel(Model model) { + models_.push_back(std::move(model)); + } + + void SetVersionFetchResults(std::vector models) { + version_fetch_results_ = std::move(models); + } + + void SetIdFetchResults(std::vector models) { + id_fetch_results_ = std::move(models); + } + + protected: + std::vector FetchModels() const override { + return std::move(models_); + } + + std::vector FetchModelVersions(const std::string& model_alias, + const std::string& model_name = "") const override { + std::vector result; + for (const auto& model : version_fetch_results_) { + const auto& info = model.Info(); + if (info.alias != model_alias) { + continue; + } + + if (!model_name.empty() && info.name != model_name) { + continue; + } + + result.push_back(MakeModel(info.model_id, info.name, info.version, info.alias, model.LocalPath())); + } + + return result; + } + + std::vector FetchModelsByIds(const std::vector& model_ids) const override { + std::unordered_set requested(model_ids.begin(), model_ids.end()); + std::vector result; + for (const auto& model : id_fetch_results_) { + const auto& info = model.Info(); + if (!requested.contains(info.model_id)) { + continue; + } + + result.push_back(MakeModel(info.model_id, info.name, info.version, info.alias, model.LocalPath())); + } + + return result; + } + + private: + mutable std::vector models_; + mutable std::vector version_fetch_results_; + mutable std::vector id_fetch_results_; +}; + // Helper: create a Model from basic fields. static Model MakeModel(const std::string& model_id, const std::string& name, int version, const std::string& alias, @@ -257,3 +323,104 @@ TEST_F(BaseModelCatalogTest, GetModelVariant_ById_ReturnsVariantNotContainer) { ASSERT_NE(container, nullptr); EXPECT_EQ(container->Variants().size(), 2u); } + +TEST_F(BaseModelCatalogTest, GetModelVersionsDoesNotIntegrateFetchedVariants) { + QueryingTestCatalog catalog(logger_); + catalog.AddModel(MakeModel("phi-3-mini:2", "phi-3-mini", 2, "phi-3")); + + std::vector version_results; + version_results.push_back(MakeModel("phi-3-mini:1", "phi-3-mini", 1, "phi-3")); + catalog.SetVersionFetchResults(std::move(version_results)); + + auto versions = catalog.GetModelVersions("phi-3", "", 0); + ASSERT_EQ(versions.size(), 1u); + EXPECT_EQ(versions[0]->Info().model_id, "phi-3-mini:1"); + + auto* container = catalog.GetModel("phi-3"); + ASSERT_NE(container, nullptr); + EXPECT_EQ(container->Variants().size(), 1u) + << "GetModelVersions should not add fetched versions to the catalog's main indices."; +} + +TEST_F(BaseModelCatalogTest, GetModelVersionsCrossAliasPointersRemainValid) { + QueryingTestCatalog catalog(logger_); + catalog.AddModel(MakeModel("phi-3-mini:1", "phi-3-mini", 1, "phi-3")); + catalog.AddModel(MakeModel("llama:1", "llama", 1, "llama")); + + // Seed version results for both aliases. + std::vector phi3_versions; + phi3_versions.push_back(MakeModel("phi-3-mini:1", "phi-3-mini", 1, "phi-3")); + phi3_versions.push_back(MakeModel("phi-3-mini:2", "phi-3-mini", 2, "phi-3")); + catalog.SetVersionFetchResults(std::move(phi3_versions)); + + // First query: phi-3 + auto phi3_result = catalog.GetModelVersions("phi-3", "", 0); + ASSERT_EQ(phi3_result.size(), 2u); + Model* phi3_ptr = phi3_result[0]; + + // Second query: llama — must not invalidate phi3_ptr. + std::vector llama_versions; + llama_versions.push_back(MakeModel("llama:1", "llama", 1, "llama")); + llama_versions.push_back(MakeModel("llama:2", "llama", 2, "llama")); + catalog.SetVersionFetchResults(std::move(llama_versions)); + + auto llama_result = catalog.GetModelVersions("llama", "", 0); + ASSERT_EQ(llama_result.size(), 2u); + + // phi3_ptr must still be alive and accessible. + EXPECT_EQ(phi3_ptr->Info().alias, "phi-3") + << "Querying a different alias should not invalidate pointers from a prior GetModelVersions call."; +} + + TEST_F(BaseModelCatalogTest, GetModelVersionsMaxVersionsSelectsLatestRegardlessOfFetchOrder) { + QueryingTestCatalog catalog(logger_); + + std::vector version_results; + // Intentionally unsorted fetch order: v2, v1, v3. + version_results.push_back(MakeModel("phi-3-mini-generic-cpu:2", "phi-3-mini", 2, "phi-3")); + version_results.push_back(MakeModel("phi-3-mini-generic-cpu:1", "phi-3-mini", 1, "phi-3")); + version_results.push_back(MakeModel("phi-3-mini-generic-cpu:3", "phi-3-mini", 3, "phi-3")); + catalog.SetVersionFetchResults(std::move(version_results)); + + auto versions = catalog.GetModelVersions("phi-3", "", /*max_versions=*/1); + ASSERT_EQ(versions.size(), 1u); + EXPECT_EQ(versions.front()->Info().version, 3) + << "max_versions=1 should pick the latest version even when fetch order is arbitrary."; + } + +TEST_F(BaseModelCatalogTest, GetModelVariantIdIntegratesFetchedVariant) { + QueryingTestCatalog catalog(logger_); + catalog.AddModel(MakeModel("phi-3-mini:2", "phi-3-mini", 2, "phi-3")); + + std::vector id_results; + id_results.push_back(MakeModel("phi-3-mini:1", "phi-3-mini", 1, "phi-3")); + catalog.SetIdFetchResults(std::move(id_results)); + + auto* fetched = catalog.GetModelVariant("phi-3-mini:1"); + ASSERT_NE(fetched, nullptr); + EXPECT_EQ(fetched->Info().model_id, "phi-3-mini:1"); + + auto* container = catalog.GetModel("phi-3"); + ASSERT_NE(container, nullptr); + EXPECT_EQ(container->Variants().size(), 2u) + << "ID-based fetches should still integrate so download-specific lookups persist in the catalog."; +} + +TEST_F(BaseModelCatalogTest, GetModelVariantIdIntegrationPreservesPriorityOrdering) { + QueryingTestCatalog catalog(logger_); + catalog.AddModel(MakeModel("phi-3-mini-generic-cpu:1", "phi-3-mini", 1, "phi-3")); + + std::vector id_results; + id_results.push_back(MakeModel("phi-3-mini-npu:1", "phi-3-mini", 1, "phi-3")); + catalog.SetIdFetchResults(std::move(id_results)); + + auto* fetched = catalog.GetModelVariant("phi-3-mini-npu:1"); + ASSERT_NE(fetched, nullptr); + + auto* container = catalog.GetModel("phi-3"); + ASSERT_NE(container, nullptr); + auto variants = container->Variants(); + ASSERT_EQ(variants.size(), 2u); + EXPECT_EQ(variants.front()->Info().model_id, "phi-3-mini-npu:1") + << "Integrated variants should be re-sorted so higher-priority devices stay first."; +} diff --git a/sdk_v2/cpp/test/internal_api/c_api_test.cc b/sdk_v2/cpp/test/internal_api/c_api_test.cc index fda964bd..993b9ac9 100644 --- a/sdk_v2/cpp/test/internal_api/c_api_test.cc +++ b/sdk_v2/cpp/test/internal_api/c_api_test.cc @@ -5,7 +5,11 @@ #include #include #include +#include +#include +#include #include +#include // All tests go through the vtable obtained from FoundryLocalGetApi(). // Error handling: C API functions return flStatus* where nullptr == success (non-null == error with code + message). @@ -299,6 +303,435 @@ TEST(CApiTest, GetModelsFromCatalog) { api->Manager_Release(mgr); } +TEST(CApiTest, GetModelVersionsNullCatalogFails) { + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + + flModelList* models = nullptr; + flStatus* status = catalog_api->GetModelVersions(nullptr, "alias", nullptr, 0, &models); + ASSERT_NE(status, nullptr); + EXPECT_EQ(api->Status_GetErrorCode(status), FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT); + api->Status_Release(status); +} + +TEST(CApiTest, GetModelVersionsNullOutputFails) { + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + ASSERT_FL_OK(api, api->Manager_Create(config, &mgr)); + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + flStatus* status = catalog_api->GetModelVersions(cat, "alias", nullptr, 0, nullptr); + ASSERT_NE(status, nullptr); + EXPECT_EQ(api->Status_GetErrorCode(status), FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT); + api->Status_Release(status); + + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + +TEST(CApiTest, GetModelVersionsUnknownAliasReturnsEmptyList) { + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + flStatus* status = api->Manager_Create(config, &mgr); + if (!IsOk(status)) { + // Manager creation may fail if catalog is unreachable — skip gracefully. + api->Status_Release(status); + api->GetConfigurationApi()->Configuration_Release(config); + GTEST_SKIP() << "Manager creation failed (catalog may be unreachable)"; + } + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + flModelList* models = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModelVersions(cat, "definitely-not-a-real-alias", nullptr, 0, &models)); + ASSERT_NE(models, nullptr); + EXPECT_EQ(api->ModelList_Size(models), 0u); + api->ModelList_Release(models); + + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + +TEST(CApiTest, GetModelVersionsEmptyAliasFails) { + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + flStatus* status = api->Manager_Create(config, &mgr); + if (!IsOk(status)) { + api->Status_Release(status); + api->GetConfigurationApi()->Configuration_Release(config); + GTEST_SKIP() << "Manager creation failed (catalog may be unreachable)"; + } + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + flModelList* models = nullptr; + flStatus* get_versions_status = catalog_api->GetModelVersions(cat, "", nullptr, 0, &models); + ASSERT_NE(get_versions_status, nullptr); + EXPECT_EQ(api->Status_GetErrorCode(get_versions_status), FOUNDRY_LOCAL_ERROR_INVALID_ARGUMENT); + api->Status_Release(get_versions_status); + EXPECT_EQ(models, nullptr); + + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + +TEST(CApiTest, GetModelVersionsForPhi3ReturnsAllVersions) { + // The canonical phi-3 family alias in the live Azure catalog is "phi-3-mini-4k", which is published + // with multiple versions (e.g. generic-gpu:2, generic-cpu:3). FetchAllVersionsByAlias drops the + // `labels=latest` filter, so this end-to-end call should return at least one variant tagged with that + // alias. The test is network-dependent and skips gracefully if the catalog is unreachable. + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + const flModelApi* model_api = api->GetModelApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + flStatus* status = api->Manager_Create(config, &mgr); + if (!IsOk(status)) { + api->Status_Release(status); + api->GetConfigurationApi()->Configuration_Release(config); + GTEST_SKIP() << "Manager creation failed (catalog may be unreachable)"; + } + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + constexpr const char* kPhi3Alias = "phi-3-mini-4k"; + flModelList* models = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModelVersions(cat, kPhi3Alias, nullptr, 0, &models)); + ASSERT_NE(models, nullptr); + + const size_t count = api->ModelList_Size(models); + if (count == 0) { + api->ModelList_Release(models); + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); + GTEST_SKIP() << "Catalog returned no variants for '" << kPhi3Alias + << "' (catalog may be unreachable or the alias has been retired)"; + } + + // Every returned model must belong to the requested alias, and across the result set we expect to see + // more than one distinct version (the whole point of FetchAllVersionsByAlias is to bypass labels=latest). + // Print every (model_id, version) pair so failures and successes both show the live catalog content. + std::cout << "[ ] GetModelVersions('" << kPhi3Alias << "') returned " << count + << " variant(s):\n"; + std::set distinct_versions; + for (size_t i = 0; i < count; ++i) { + flModel* model = api->ModelList_GetAt(models, i); + ASSERT_NE(model, nullptr); + const flModelInfo* info = nullptr; + ASSERT_FL_OK(api, model_api->GetInfo(model, &info)); + ASSERT_NE(info, nullptr); + + const char* alias = model_api->Info_GetAlias(info); + ASSERT_NE(alias, nullptr); + EXPECT_STREQ(alias, kPhi3Alias) << "Model at index " << i << " has unexpected alias"; + + const int version = model_api->Info_GetVersion(info); + EXPECT_GT(version, 0) << "Model at index " << i << " has non-positive version"; + distinct_versions.insert(version); + + const char* model_id = model_api->Info_GetId(info); + + // GetModelVersions is a metadata-only catalog query — by design it must NOT pull any model bits into + // the local cache. Verify each returned variant is reported as un-cached. + int cached = -1; + ASSERT_FL_OK(api, model_api->IsCached(model, &cached)); + EXPECT_EQ(cached, 0) << "Model at index " << i << " (model_id='" + << (model_id ? model_id : "(null)") + << "') unexpectedly appears in the local cache after GetModelVersions"; + + std::cout << "[" << i << "] model_id='" << (model_id ? model_id : "(null)") + << "' version=" << version << " cached=" << cached << "\n"; + } + + EXPECT_GE(distinct_versions.size(), 2u) + << "Expected at least two distinct versions for '" << kPhi3Alias + << "', got " << distinct_versions.size(); + + api->ModelList_Release(models); + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + +// Verify max_versions is applied per variant name by comparing an unbounded +// result against max_versions=1 for the same alias. +TEST(CApiTest, GetModelVersionsMaxVersionsIsPerVariant) { + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + const flModelApi* model_api = api->GetModelApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + flStatus* status = api->Manager_Create(config, &mgr); + if (!IsOk(status)) { + api->Status_Release(status); + api->GetConfigurationApi()->Configuration_Release(config); + GTEST_SKIP() << "Manager creation failed (catalog may be unreachable)"; + } + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + constexpr const char* kPhi3Alias = "phi-3-mini-4k"; + + // Reference: one unbounded call. + flModelList* baseline = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModelVersions(cat, kPhi3Alias, nullptr, 0, &baseline)); + ASSERT_NE(baseline, nullptr); + const size_t baseline_count = api->ModelList_Size(baseline); + std::cout << "[ ] Baseline GetModelVersions('" << kPhi3Alias + << "', max_versions=0) returned " << baseline_count << " variant(s):\n"; + if (baseline_count == 0) { + api->ModelList_Release(baseline); + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); + GTEST_SKIP() << "Catalog returned no variants for '" << kPhi3Alias << "'"; + } + + std::map> baseline_versions_by_variant; + for (size_t i = 0; i < baseline_count; ++i) { + flModel* m = api->ModelList_GetAt(baseline, i); + ASSERT_NE(m, nullptr); + const flModelInfo* info = nullptr; + ASSERT_FL_OK(api, model_api->GetInfo(m, &info)); + ASSERT_NE(info, nullptr); + const char* name = model_api->Info_GetName(info); + ASSERT_NE(name, nullptr); + const int version = model_api->Info_GetVersion(info); + baseline_versions_by_variant[name].insert(version); + + const char* model_id = model_api->Info_GetId(info); + std::cout << "[baseline " << i << "] model_id='" << (model_id ? model_id : "(null)") + << "' name='" << name << "' version=" << version << "\n"; + } + api->ModelList_Release(baseline); + + flModelList* capped = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModelVersions(cat, kPhi3Alias, nullptr, /*max_versions=*/1, &capped)); + ASSERT_NE(capped, nullptr); + + std::map> capped_versions_by_variant; + const size_t capped_count = api->ModelList_Size(capped); + std::cout << "[ ] Capped GetModelVersions('" << kPhi3Alias + << "', max_versions=1) returned " << capped_count << " variant(s):\n"; + for (size_t i = 0; i < capped_count; ++i) { + flModel* m = api->ModelList_GetAt(capped, i); + ASSERT_NE(m, nullptr); + const flModelInfo* info = nullptr; + ASSERT_FL_OK(api, model_api->GetInfo(m, &info)); + ASSERT_NE(info, nullptr); + const char* name = model_api->Info_GetName(info); + ASSERT_NE(name, nullptr); + const int version = model_api->Info_GetVersion(info); + capped_versions_by_variant[name].insert(version); + + const char* model_id = model_api->Info_GetId(info); + std::cout << "[capped " << i << "] model_id='" << (model_id ? model_id : "(null)") + << "' name='" << name << "' version=" << version << "\n"; + } + api->ModelList_Release(capped); + + ASSERT_FALSE(capped_versions_by_variant.empty()); + for (const auto& [variant, versions] : capped_versions_by_variant) { + ASSERT_EQ(versions.size(), 1u) << "variant='" << variant << "' should have at most one version"; + auto baseline_it = baseline_versions_by_variant.find(variant); + ASSERT_NE(baseline_it, baseline_versions_by_variant.end()) + << "variant='" << variant << "' missing from baseline"; + const int expected_latest = *baseline_it->second.rbegin(); + EXPECT_EQ(*versions.begin(), expected_latest) + << "variant='" << variant << "' did not return latest version"; + } + + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + +TEST(CApiTest, GetModelVersionsDoesNotPersistReturnedVariantsIntoCatalogModelView) { + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + const flModelApi* model_api = api->GetModelApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + flStatus* status = api->Manager_Create(config, &mgr); + if (!IsOk(status)) { + api->Status_Release(status); + api->GetConfigurationApi()->Configuration_Release(config); + GTEST_SKIP() << "Manager creation failed (catalog may be unreachable)"; + } + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + constexpr const char* kPhi3Alias = "phi-3-mini-4k"; + + flModel* baseline_model = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModel(cat, kPhi3Alias, &baseline_model)); + if (!baseline_model) { + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); + GTEST_SKIP() << "Catalog returned no grouped model for '" << kPhi3Alias + << "' (catalog may be unreachable or the alias has been retired)"; + } + + flModelList* baseline_variants = nullptr; + ASSERT_FL_OK(api, model_api->GetVariants(baseline_model, &baseline_variants)); + ASSERT_NE(baseline_variants, nullptr); + const size_t baseline_count = api->ModelList_Size(baseline_variants); + api->ModelList_Release(baseline_variants); + + flModelList* versions = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModelVersions(cat, kPhi3Alias, nullptr, 0, &versions)); + ASSERT_NE(versions, nullptr); + const size_t fetched_count = api->ModelList_Size(versions); + if (fetched_count == 0) { + api->ModelList_Release(versions); + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); + GTEST_SKIP() << "Catalog returned no variants for '" << kPhi3Alias << "'"; + } + + flModelList* after_variants = nullptr; + ASSERT_FL_OK(api, model_api->GetVariants(baseline_model, &after_variants)); + ASSERT_NE(after_variants, nullptr); + const size_t after_count = api->ModelList_Size(after_variants); + api->ModelList_Release(after_variants); + + EXPECT_EQ(after_count, baseline_count) + << "GetModelVersions should not add returned versions to the catalog's grouped model view."; + + api->ModelList_Release(versions); + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + +// Disabled by default: actually downloads a multi-GB model from Azure. Run manually with +// --gtest_also_run_disabled_tests when you need to exercise the Download code path against the live +// catalog. Skipped automatically in CI per the C++ test policy (no model downloads in CI). +TEST(CApiTest, DISABLED_DownloadPhi3MiniCpuV1) { + // Target the explicit ':1' variant ID: GetModelVariant uses the ID-fallback path that returns a single + // pinned variant rather than the alias' latest-labeled default. + constexpr const char* kPhi3Alias = "phi-3-mini-4k"; + constexpr const char* kVariantId = "Phi-3-mini-4k-instruct-generic-cpu:1"; + constexpr int kExpectedVersion = 1; + + const flApi* api = GetApi(); + ASSERT_NE(api, nullptr); + const flCatalogApi* catalog_api = api->GetCatalogApi(); + const flModelApi* model_api = api->GetModelApi(); + + flConfiguration* config = CreateTestConfig(api); + ASSERT_NE(config, nullptr); + + flManager* mgr = nullptr; + flStatus* status = api->Manager_Create(config, &mgr); + if (!IsOk(status)) { + api->Status_Release(status); + api->GetConfigurationApi()->Configuration_Release(config); + GTEST_SKIP() << "Manager creation failed (catalog may be unreachable)"; + } + flCatalog* cat = nullptr; + ASSERT_FL_OK(api, api->Manager_GetCatalog(mgr, &cat)); + + flModel* model = nullptr; + ASSERT_FL_OK(api, catalog_api->GetModelVariant(cat, kVariantId, &model)); + ASSERT_NE(model, nullptr); + + // Sanity-check the variant we got back matches what we asked for. + const flModelInfo* info = nullptr; + ASSERT_FL_OK(api, model_api->GetInfo(model, &info)); + ASSERT_NE(info, nullptr); + EXPECT_STREQ(model_api->Info_GetId(info), kVariantId); + EXPECT_STREQ(model_api->Info_GetAlias(info), kPhi3Alias); + EXPECT_EQ(model_api->Info_GetVersion(info), kExpectedVersion); + + std::cout << "[ ] Target variant: model_id='" << kVariantId << "' alias='" + << kPhi3Alias << "' version=" << kExpectedVersion << "\n"; + + // Force a clean download path: if the model is already cached from a previous run, evict it first so + // we exercise the full network code path and get a meaningful progress stream. + int cached = -1; + ASSERT_FL_OK(api, model_api->IsCached(model, &cached)); + if (cached) { + std::cout << "[ ] Pre-existing cache entry detected; calling RemoveFromCache to force a " + "fresh download.\n"; + ASSERT_FL_OK(api, model_api->RemoveFromCache(model)); + ASSERT_FL_OK(api, model_api->IsCached(model, &cached)); + ASSERT_EQ(cached, 0) << "RemoveFromCache did not clear the cache entry"; + } + + // C ABI uses a plain function-pointer callback; route through a file-scope static vector via the + // user-data pointer so we can capture progress values from inside the C trampoline. + std::vector progress_values; + auto progress_cb = +[](float value, void* user_data) -> int { + static_cast*>(user_data)->push_back(value); + return 0; // continue + }; + + std::cout << "[ ] Downloading " << kVariantId + << " (this may take a while; multi-GB transfer)...\n"; + ASSERT_FL_OK(api, model_api->Download(model, progress_cb, &progress_values)); + + // Confirm the cache flipped 0 -> 1 across Download. + ASSERT_FL_OK(api, model_api->IsCached(model, &cached)); + EXPECT_EQ(cached, 1) << "Model should be cached after a successful Download"; + + // A real download produces multiple progress callbacks, with the final at 100% and the sequence + // monotonically non-decreasing. + ASSERT_FALSE(progress_values.empty()) << "Download produced no progress callbacks"; + EXPECT_FLOAT_EQ(progress_values.back(), 100.0f); + for (size_t i = 1; i < progress_values.size(); ++i) { + EXPECT_GE(progress_values[i], progress_values[i - 1]) + << "Progress went backwards at index " << i; + } + std::cout << "[ ] Download complete: " << progress_values.size() + << " progress callbacks, final value=" << progress_values.back() << "\n"; + + // Spot-check that GetPath now returns a usable filesystem path under the test cache dir. + const char* path = nullptr; + ASSERT_FL_OK(api, model_api->GetPath(model, &path)); + ASSERT_NE(path, nullptr); + EXPECT_GT(std::strlen(path), 0u); + EXPECT_TRUE(std::filesystem::exists(path)) << "Reported model path does not exist: " << path; + std::cout << "[ ] GetPath -> '" << path << "'\n"; + + // Clean up so re-running the test from a fresh cache continues to exercise the download path, and so + // we don't litter the developer's disk with model bits after a manual run. + ASSERT_FL_OK(api, model_api->RemoveFromCache(model)); + ASSERT_FL_OK(api, model_api->IsCached(model, &cached)); + EXPECT_EQ(cached, 0) << "RemoveFromCache failed to evict the just-downloaded model"; + + api->GetConfigurationApi()->Configuration_Release(config); + api->Manager_Release(mgr); +} + // ======================================================================== // ModelList API // ======================================================================== diff --git a/sdk_v2/cpp/test/internal_api/web_service_test_helpers.h b/sdk_v2/cpp/test/internal_api/web_service_test_helpers.h index ade2bd52..318fc604 100644 --- a/sdk_v2/cpp/test/internal_api/web_service_test_helpers.h +++ b/sdk_v2/cpp/test/internal_api/web_service_test_helpers.h @@ -80,6 +80,26 @@ class MockCatalog : public ICatalog { return result; } + std::vector GetModelVersions(const std::string& model_alias, + const std::string& variant_name, + int max_versions = 0) override { + std::vector result; + for (auto& m : models_) { + if (!model_alias.empty() && m.Alias() != model_alias) { + continue; + } + for (auto* v : m.Variants()) { + if (max_versions > 0 && result.size() >= static_cast(max_versions)) { + return result; + } + if (variant_name.empty() || v->Info().name == variant_name) { + result.push_back(v); + } + } + } + return result; + } + /// Add a model variant. Groups variants by alias into container models, /// matching BaseModelCatalog behavior. void AddModel(Model model) {