From bd1886288c9803b901958af2956f1c2612f16237 Mon Sep 17 00:00:00 2001 From: Esraa Kamel Date: Sun, 21 Jun 2026 12:16:14 +0300 Subject: [PATCH] Python: Fix Cosmos DB for MongoDB vector index kind CosmosMongoCollection._get_index_definitions set cosmosSearchOptions["kind"] from DISTANCE_FUNCTION_MAP_MONGODB (a similarity code, e.g. "COS") instead of INDEX_KIND_MAP_MONGODB (the index kind, e.g. "vector-hnsw"). As a result the created vector index used an invalid kind, "kind" equalled "similarity", and the HNSW/IVF/DiskANN tuning options (m, efConstruction, numList, maxDegree, lBuild) were silently dropped because the `match index_kind` block could never match. Use INDEX_KIND_MAP_MONGODB[field.index_kind] for the index kind and keep the distance-function map for "similarity". Update the existing index test (which asserted the buggy "COS" value) and add a test covering the tuning options. --- .../connectors/azure_cosmos_db.py | 2 +- ...test_azure_cosmos_db_mongodb_collection.py | 40 ++++++++++++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/python/semantic_kernel/connectors/azure_cosmos_db.py b/python/semantic_kernel/connectors/azure_cosmos_db.py index 0da6f52b8346..848f4af1424a 100644 --- a/python/semantic_kernel/connectors/azure_cosmos_db.py +++ b/python/semantic_kernel/connectors/azure_cosmos_db.py @@ -398,7 +398,7 @@ def _get_index_definitions(self, **kwargs: Any) -> dict[str, Any]: f"Distance function '{field.distance_function}' is not supported by Azure Cosmos DB for MongoDB." ) index_name = f"{field.storage_name or field.name}_" - index_kind = DISTANCE_FUNCTION_MAP_MONGODB[field.distance_function] + index_kind = INDEX_KIND_MAP_MONGODB[field.index_kind] index: dict[str, Any] = { "name": index_name, FieldTypes.KEY: {field.storage_name or field.name: "cosmosSearch"}, diff --git a/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py b/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py index 8da8a1ab08e8..62a2b6437d3b 100644 --- a/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py +++ b/python/tests/unit/connectors/memory/azure_cosmos_db/test_azure_cosmos_db_mongodb_collection.py @@ -98,11 +98,47 @@ async def test_ensure_collection_exists_calls_database_methods(definition) -> No # Check the vector field index creation assert command_args["indexes"][1]["name"] == "vector_" assert command_args["indexes"][1]["key"] == {"vector": "cosmosSearch"} - assert command_args["indexes"][1]["cosmosSearchOptions"]["kind"] == "COS" - assert command_args["indexes"][1]["cosmosSearchOptions"]["similarity"] is not None + # `kind` must be the vector index kind, `similarity` the distance function; they are distinct. + assert command_args["indexes"][1]["cosmosSearchOptions"]["kind"] == "vector-hnsw" + assert command_args["indexes"][1]["cosmosSearchOptions"]["similarity"] == "COS" + assert ( + command_args["indexes"][1]["cosmosSearchOptions"]["kind"] + != command_args["indexes"][1]["cosmosSearchOptions"]["similarity"] + ) assert command_args["indexes"][1]["cosmosSearchOptions"]["dimensions"] == 5 +async def test_ensure_collection_exists_applies_hnsw_tuning_options(definition) -> None: + """ + Test that HNSW tuning options (m, efConstruction) are forwarded into cosmosSearchOptions. + + The default `definition` fixture uses index_kind="hnsw", so the index `kind` must resolve to + "vector-hnsw" for the HNSW tuning block to apply. (Previously `kind` was set to the similarity + code "COS", so the tuning block never matched and these options were silently dropped.) + """ + mock_database = AsyncMock() + mock_database.create_collection = AsyncMock() + mock_database.command = AsyncMock() + + mock_client = AsyncMock(spec=AsyncMongoClient) + mock_client.get_database = MagicMock(return_value=mock_database) + + collection = CosmosMongoCollection( + collection_name="test_collection", + record_type=dict, + definition=definition, + mongo_client=mock_client, + database_name="test_db", + ) + + await collection.ensure_collection_exists(m=16, efConstruction=64) + + search_options = mock_database.command.call_args.kwargs["command"]["indexes"][1]["cosmosSearchOptions"] + assert search_options["kind"] == "vector-hnsw" + assert search_options["m"] == 16 + assert search_options["efConstruction"] == 64 + + async def test_context_manager_calls_aconnect_and_close_when_managed(mock_model) -> None: """ Test that the context manager in AzureCosmosDBforMongoDBCollection calls 'aconnect' and