intel · ai-fw-intg · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 25, 2026
diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml
@@ -78,8 +78,8 @@ jobs:
       run: |
         set -e -x
         BINARY_SIZE_THRESHOLD_ARGS=""
-        echo "Binary size threshold in bytes: 1436672"
-        BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672"
+        echo "Binary size threshold in bytes: 1440768"
+        BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1440768"
 
         # Ensure ANDROID_NDK_HOME is available and get its real path
         if [ -z "$ANDROID_NDK_HOME" ]; then

diff --git a/.github/workflows/linux_cuda_plugin_ci.yml b/.github/workflows/linux_cuda_plugin_ci.yml
@@ -141,3 +141,31 @@ jobs:
               cd /onnxruntime_src/onnxruntime/test/python/transformers
               python test_cuda_plugin_ep.py
             "
+
+      # --- Run the CUDA plugin EP C++ GoogleTest binary ---
+      # onnxruntime_provider_test is built into the artifact and links the plugin tests
+      # (gated by ORT_UNIT_TEST_HAS_CUDA_PLUGIN_EP). These tests register the plugin .so via
+      # GetSharedLibraryFileName("onnxruntime_providers_cuda_plugin"), which returns the
+      # platform-specific filename without a directory component. Run from /build/Release/Release
+      # so that filename resolves to the plugin .so built there.
+      # The filter covers every CUDA plugin EP suite linked into this binary:
+      #   CudaPlugin*              -> CudaPluginUserStreamGraphTest, CudaPluginArenaTest,
+      #                              CudaPluginPartitioningTest, CudaPluginProfilingTest
+      #   CudaResourcePartitioning* -> CudaResourcePartitioningTest
+      - name: Run CUDA Plugin EP C++ Tests
+        run: |
+          docker run --rm --gpus all \
+            -v ${{ github.workspace }}:/onnxruntime_src \
+            -v ${{ runner.temp }}/Release:/build/Release \
+            -e NVIDIA_VISIBLE_DEVICES=all \
+            ${{ steps.build_docker_image_step.outputs.full-image-name }} \
+            bash -c "
+              set -ex
+              export PATH=/opt/python/cp312-cp312/bin:\$PATH
+              # Make libcudart.so.13 (and the plugin's CUDA deps) findable; see note above.
+              export LD_LIBRARY_PATH=/build/Release/Release:/usr/local/cuda-13.0/lib64:\${LD_LIBRARY_PATH:-}
+
+              cd /build/Release/Release
+              ls -la onnxruntime_provider_test libonnxruntime_providers_cuda_plugin.so
+              ./onnxruntime_provider_test --gtest_filter='CudaPlugin*:CudaResourcePartitioning*'
+            "
diff --git a/.github/workflows/windows_cuda.yml b/.github/workflows/windows_cuda.yml
@@ -148,7 +148,7 @@ jobs:
       DocUpdateNeeded: false
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
 
   test:
     name: Windows GPU CUDA CI Pipeline Test Job
@@ -260,4 +260,4 @@ jobs:
       DocUpdateNeeded: false
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
diff --git a/.github/workflows/windows_cuda_plugin.yml b/.github/workflows/windows_cuda_plugin.yml
@@ -118,7 +118,7 @@ jobs:
       ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
 
   test:
     name: Windows CUDA Plugin EP Test
@@ -214,4 +214,4 @@ jobs:
       ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
diff --git a/.github/workflows/windows_gpu_doc_gen.yml b/.github/workflows/windows_gpu_doc_gen.yml
@@ -44,7 +44,7 @@ jobs:
       setVcvars: true
       ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
     runs-on: [
         "self-hosted",
         "1ES.Pool=onnxruntime-github-Win2022-GPU-A10",

diff --git a/.github/workflows/windows_openvino.yml b/.github/workflows/windows_openvino.yml
@@ -26,7 +26,7 @@ jobs:
     timeout-minutes: 240
     env:
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
       OnnxRuntimeBuildDirectory: ${{ github.workspace }}
       DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
       ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'

diff --git a/.github/workflows/windows_qnn_x64.yml b/.github/workflows/windows_qnn_x64.yml
@@ -29,7 +29,7 @@ jobs:
         QnnLibKind: [shared_lib, static_lib]
     env:
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
       DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
       ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'
 

diff --git a/.github/workflows/windows_tensorrt.yml b/.github/workflows/windows_tensorrt.yml
@@ -154,7 +154,7 @@ jobs:
       DocUpdateNeeded: false
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
 
   test:
     name: Windows GPU TensorRT CI Pipeline Test Job
@@ -265,4 +265,4 @@ jobs:
       DocUpdateNeeded: false
       ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
       AZCOPY_AUTO_LOGIN_TYPE: MSI
-      AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+      AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -50,7 +50,7 @@ protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/downlo
 psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
 pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
 pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae
-pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
+pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/4628dc060ce4e82345dc166bbac875609db4ff69.zip;e58d4b47c16a982111c897e669ae4f1821a393d7
 re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
 tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -371,9 +371,7 @@ if (CPUINFO_SUPPORTED)
       PATCH_COMMAND
         ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch &&
         # https://github.com/pytorch/cpuinfo/pull/324
-        ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch &&
-        # https://github.com/pytorch/cpuinfo/pull/348
-        ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
+        ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch
       FIND_PACKAGE_ARGS NAMES cpuinfo
     )
   elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")

diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
@@ -56,6 +56,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
   ${MLAS_SRC_DIR}/sqnbitgemm_q8_block.h
   ${MLAS_SRC_DIR}/flashattn.cpp
   ${MLAS_SRC_DIR}/flashattn_qkv.cpp
+  ${MLAS_SRC_DIR}/flashattn_gqa.cpp
   ${MLAS_SRC_DIR}/qkv_quant.cpp
   ${MLAS_SRC_DIR}/cast.cpp
   ${MLAS_SRC_DIR}/layernorm.cpp

diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake
@@ -208,7 +208,7 @@
     target_compile_definitions(onnxruntime_providers_cuda PRIVATE FILE_NAME=\"onnxruntime_providers_cuda.dll\")
   endif()
 
-  # Work around a CUDA 13.x cudafe++ (EDG front-end) regression that mis-parses CCCL's
+  # Work around a CUDA 13.3 cudafe++ (EDG front-end) regression that mis-parses CCCL's
   # global-qualified partial specializations, e.g. in <cub/device/device_transform.cuh>:
   #   template <typename T>
   #   struct ::cuda::proclaims_copyable_arguments<...> : ::cuda::std::true_type {};
@@ -218,7 +218,7 @@
   # corrected copies of the affected headers into the build tree and place that directory
   # ahead of the toolkit cccl include path. This is a no-op on toolkits whose headers do not
   # contain the offending pattern (e.g. once NVIDIA fixes it), so it is safe to keep enabled.
-  function(ort_cuda13_patch_cccl_header src dst)
+  function(ort_cuda133_patch_cccl_header src dst)
     if (NOT EXISTS "${src}")
       return()
     endif()
@@ -412,19 +412,21 @@
     if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
       foreach(inc_dir ${CUDAToolkit_INCLUDE_DIRS})
         if (EXISTS "${inc_dir}/cccl")
-          # Generate cudafe++-parseable copies of the CCCL headers that contain global-qualified
-          # partial specializations (see ort_cuda13_patch_cccl_header above) and put the fixed
-          # directory ahead of the toolkit cccl include so the corrected headers win.
-          set(_ort_cccl_fix_dir "${CMAKE_CURRENT_BINARY_DIR}/cccl_cuda13_fix")
-          ort_cuda13_patch_cccl_header(
-            "${inc_dir}/cccl/cub/device/device_transform.cuh"
-            "${_ort_cccl_fix_dir}/cub/device/device_transform.cuh")
-          ort_cuda13_patch_cccl_header(
-            "${inc_dir}/cccl/cub/device/dispatch/tuning/tuning_transform.cuh"
-            "${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
-          if (EXISTS "${_ort_cccl_fix_dir}/cub/device/device_transform.cuh" OR
-              EXISTS "${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
-            target_include_directories(${target} BEFORE PRIVATE "${_ort_cccl_fix_dir}")
+          if (UNIX AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.3 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.4)
+            # Generate cudafe++-parseable copies of the CCCL headers that contain global-qualified
+            # partial specializations (see ort_cuda133_patch_cccl_header above) and put the fixed
+            # directory ahead of the toolkit cccl include so the corrected headers win.
+            set(_ort_cccl_fix_dir "${CMAKE_CURRENT_BINARY_DIR}/cccl_cuda13_fix")
+            ort_cuda133_patch_cccl_header(
+              "${inc_dir}/cccl/cub/device/device_transform.cuh"
+              "${_ort_cccl_fix_dir}/cub/device/device_transform.cuh")
+            ort_cuda133_patch_cccl_header(
+              "${inc_dir}/cccl/cub/device/dispatch/tuning/tuning_transform.cuh"
+              "${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
+            if (EXISTS "${_ort_cccl_fix_dir}/cub/device/device_transform.cuh" OR
+                EXISTS "${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
+              target_include_directories(${target} BEFORE PRIVATE "${_ort_cccl_fix_dir}")
+            endif()
           endif()
 
           # Add the cccl subdirectory to the include path so <cuda/std/utility> can be found

diff --git a/cmake/onnxruntime_providers_cuda_plugin.cmake b/cmake/onnxruntime_providers_cuda_plugin.cmake
@@ -88,10 +88,11 @@ list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/tensor/sequence_op\\.cc$")
 # in the CPU provider and is not linked into the plugin.
 list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/tensor/size\\.cc$")
 
-# Permanently excluded — pure CPU ops, handled by GetCpuPreferredNodes.
-# shape_op.cc inherits from onnxruntime::OpKernel (framework)
-# which cannot convert to ep::adapter::OpKernel in the plugin build.
-list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/tensor/shape_op\\.cc$")
+# shape_op.cc is INCLUDED in the plugin build. It provides an adapter-based
+# Shape kernel under #ifdef BUILD_CUDA_EP_AS_PLUGIN (the CPU onnxruntime::Shape
+# class, which derives from the framework OpKernel, is only used in the
+# non-plugin build). Registering Shape on the EP keeps it off the CPU EP and
+# avoids Memcpy nodes that would otherwise break CUDA Graph capture.
 
 # Exclude contrib training ops (shrunken_gather depends on provider_api.h in header).
 list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/contrib_ops/cuda/tensor/shrunken_gather\\.cc$")

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -603,6 +603,7 @@ set (onnxruntime_shared_lib_test_SRC
           ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc
           ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc
           ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_data_copy.cc
+          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ep_context_data_api.cc
           ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_experimental_api.cc
           ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
           ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc
@@ -1609,8 +1610,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
 
 endif()
 
-
-  if(onnxruntime_USE_QNN)
+  # Build ep_weight_sharing_ctx_gen for all supported EPs (QNN, TensorRT, OpenVINO, VitisAI)
+  if(onnxruntime_USE_QNN OR onnxruntime_USE_TENSORRT OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_VITISAI)
     #qnn ctx generator
     set(ep_weight_sharing_ctx_gen_src_dir ${TEST_SRC_DIR}/ep_weight_sharing_ctx_gen)
     set(ep_weight_sharing_ctx_gen_src_patterns
@@ -2174,6 +2175,7 @@ if (onnxruntime_BUILD_SHARED_LIB AND
   #
   file(GLOB onnxruntime_autoep_test_library_src "${TEST_SRC_DIR}/autoep/library/example_plugin_ep/*.h"
                                                 "${TEST_SRC_DIR}/autoep/library/example_plugin_ep/*.cc"
+                                                "${TEST_SRC_DIR}/autoep/library/ep_context_data_utils.h"
                                                 "${TEST_SRC_DIR}/autoep/library/plugin_ep_utils.h")
   onnxruntime_add_shared_library_module(example_plugin_ep ${onnxruntime_autoep_test_library_src})
   target_include_directories(example_plugin_ep PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session)

diff --git a/cmake/patches/cpuinfo/fix_missing_sysfs_fallback.patch b/cmake/patches/cpuinfo/fix_missing_sysfs_fallback.patch
@@ -1,10 +1,19 @@
 diff --git a/src/linux/processors.c b/src/linux/processors.c
-index 47bee76..d0c5569 100644
+index fd040a3..2ca8ec4 100644
 --- a/src/linux/processors.c
 +++ b/src/linux/processors.c
-@@ -2,0 +3 @@
+@@ -3,6 +3,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
 +#include <unistd.h>
-@@ -291,0 +293,22 @@
+
+ #if !defined(__ANDROID__)
+ /*
+@@ -289,6 +290,28 @@ static bool max_processor_number_parser(uint32_t processor_list_start, uint32_t
+ 	return true;
+ }
+
 +static uint32_t cpuinfo_linux_get_max_processor_from_sysconf(
 +	uint32_t max_processors_count,
 +	const char* processor_list_name) {
@@ -27,13 +36,31 @@ index 47bee76..d0c5569 100644
 +	return max_processor;
 +}
 +
-@@ -301 +324 @@
+ uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count) {
+ 	uint32_t max_possible_processor = 0;
+ 	if (!cpuinfo_linux_parse_cpulist(
+@@ -298,7 +321,7 @@ uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count)
+ #else
+ 		cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
+ #endif
 -		return UINT32_MAX;
 +		return cpuinfo_linux_get_max_processor_from_sysconf(max_processors_count, POSSIBLE_CPULIST_FILENAME);
-@@ -323 +346 @@
+ 	}
+ 	if (max_possible_processor >= max_processors_count) {
+ 		cpuinfo_log_warning(
+@@ -320,7 +343,7 @@ uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count)
+ #else
+ 		cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
+ #endif
 -		return UINT32_MAX;
 +		return cpuinfo_linux_get_max_processor_from_sysconf(max_processors_count, PRESENT_CPULIST_FILENAME);
-@@ -357,0 +381,31 @@
+ 	}
+ 	if (max_present_processor >= max_processors_count) {
+ 		cpuinfo_log_warning(
+@@ -355,6 +378,37 @@ static bool detect_processor_parser(uint32_t processor_list_start, uint32_t proc
+ 	return true;
+ }
+
 +static bool cpuinfo_linux_detect_processors_from_sysconf(
 +	uint32_t max_processors_count,
 +	uint32_t* processor0_flags,
@@ -65,19 +92,34 @@ index 47bee76..d0c5569 100644
 +	return true;
 +}
 +
-@@ -373 +427,6 @@
+ bool cpuinfo_linux_detect_possible_processors(
+ 	uint32_t max_processors_count,
+ 	uint32_t* processor0_flags,
+@@ -370,7 +424,12 @@ bool cpuinfo_linux_detect_possible_processors(
+ 		return true;
+ 	} else {
+ 		cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
 -		return false;
 +		return cpuinfo_linux_detect_processors_from_sysconf(
 +			max_processors_count,
 +			processor0_flags,
 +			processor_struct_size,
 +			possible_flag,
 +			POSSIBLE_CPULIST_FILENAME);
-@@ -392 +451,6 @@
+ 	}
+ }
+
+@@ -389,7 +448,12 @@ bool cpuinfo_linux_detect_present_processors(
+ 		return true;
+ 	} else {
+ 		cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
 -		return false;
 +		return cpuinfo_linux_detect_processors_from_sysconf(
 +			max_processors_count,
 +			processor0_flags,
 +			processor_struct_size,
 +			present_flag,
 +			PRESENT_CPULIST_FILENAME);
+ 	}
+ }
+
diff --git a/cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch b/cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch
@@ -1,13 +1,13 @@
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index aedc983..dab589e 100644
+index 072c987..e43d6ab 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
 @@ -72,6 +72,17 @@ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "am
  ENDIF()
  IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$")
    SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
 +ELSEIF(MSVC AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.10")
-+  # Use CMAKE_C_COMPILER_ARCHITECTURE_ID. MSVC values are documented as available since CMake 3.10.
++  # Use CMAKE_C_COMPILER_ARCHITECTURE_ID for non-VS generators (e.g. Ninja) with MSVC.
 +  IF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "X86")
 +    SET(CPUINFO_TARGET_PROCESSOR "x86")
 +  ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "x64")

diff --git a/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch b/cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
diff --git a/cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch b/cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch
@@ -1,13 +1,13 @@
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index aedc983..dab589e 100644
+index 072c987..e43d6ab 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
 @@ -72,6 +72,17 @@ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "am
  ENDIF()
  IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$")
    SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
 +ELSEIF(MSVC AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.10")
-+  # Use CMAKE_C_COMPILER_ARCHITECTURE_ID. MSVC values are documented as available since CMake 3.10.
++  # Use CMAKE_C_COMPILER_ARCHITECTURE_ID for non-VS generators (e.g. Ninja) with MSVC.
 +  IF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "X86")
 +    SET(CPUINFO_TARGET_PROCESSOR "x86")
 +  ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "x64")