Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b77b312
Relax CompileModel validation to accept zero-input OrtModel graphs (#…
adrastogi Jun 24, 2026
eade9ea
Clamp derived sequence lengths and KV-cache index in CUDA GroupQueryA…
jiafatom Jun 24, 2026
996cea1
Add flash attention for non-quantized CPU GroupQueryAttention (#28962)
tianleiwu Jun 24, 2026
4ed5a4a
Update cpuinfo to include cpuinfo_deinitialize(), fix QNN ETW logging…
crvineeth97 Jun 25, 2026
3b022ec
[CUDA] Support user compute stream with CUDA graph in CUDA plugin EP …
tianleiwu Jun 25, 2026
d4b01e4
Fix CPU Attention causal mask alignment (#29050)
FuZoe Jun 25, 2026
92b4c66
webgpu: Enable FlashAttention for batched GQA with right-padded promp…
qjia7 Jun 26, 2026
a203dfa
[CPU] Add FP32 GEMV decode kernel for GroupQueryAttention (#29216)
tianleiwu Jun 26, 2026
0271287
Fix unbounded lifetime on WithOutputTensor in Rust bindings (#29251)
sayanshaw24 Jun 26, 2026
5f49a37
fix(ci): incorrect identity for azcopy (#29274)
sanaa-hamel-microsoft Jun 26, 2026
126ea8d
[CUDA Plugin EP] Expose kernel sync stream for scratch allocation (#2…
tianleiwu Jun 26, 2026
5786588
Validate SparseAttention CSR indices and key lengths element values (…
yuslepukhin Jun 26, 2026
37a3b51
Fix bounds in WhisperDecoderSubgraph::CreateInitialFeeds initial feed…
jiafatom Jun 26, 2026
a5d2663
[CPU] Enable pre-packed weights sharing for MatMulNBits (#29163)
derdeljan-msft Jun 26, 2026
d857f77
Fix integer overflow in RKNPU implicit bias allocation (#29249)
GopalakrishnanN Jun 26, 2026
37cccc8
Fix CUDA/cuDNN DLL preload paths for CUDA 13 consolidated wheel layou…
tianleiwu Jun 26, 2026
49efb32
Crypto support : App supply I/O callbacks to EP + callback and fallba…
GopalakrishnanN Jun 27, 2026
c65d9b6
Normalize bool tensor raw_data to {0, 1} on unpack (#29238)
jiafatom Jun 27, 2026
c3a5222
[CUDA] Remove unused code in moe_kernels.cu‎ (#29295)
tianleiwu Jun 27, 2026
54b3fbc
Merge remote-tracking branch 'origin/master' into sync_msft_28062026
AIFrameworksIntegration Jun 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ jobs:
run: |
set -e -x
BINARY_SIZE_THRESHOLD_ARGS=""
echo "Binary size threshold in bytes: 1436672"
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1436672"
echo "Binary size threshold in bytes: 1440768"
BINARY_SIZE_THRESHOLD_ARGS="--threshold_size_in_bytes 1440768"

# Ensure ANDROID_NDK_HOME is available and get its real path
if [ -z "$ANDROID_NDK_HOME" ]; then
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/linux_cuda_plugin_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,31 @@ jobs:
cd /onnxruntime_src/onnxruntime/test/python/transformers
python test_cuda_plugin_ep.py
"

# --- Run the CUDA plugin EP C++ GoogleTest binary ---
# onnxruntime_provider_test is built into the artifact and links the plugin tests
# (gated by ORT_UNIT_TEST_HAS_CUDA_PLUGIN_EP). These tests register the plugin .so via
# GetSharedLibraryFileName("onnxruntime_providers_cuda_plugin"), which returns the
# platform-specific filename without a directory component. Run from /build/Release/Release
# so that filename resolves to the plugin .so built there.
# The filter covers every CUDA plugin EP suite linked into this binary:
# CudaPlugin* -> CudaPluginUserStreamGraphTest, CudaPluginArenaTest,
# CudaPluginPartitioningTest, CudaPluginProfilingTest
# CudaResourcePartitioning* -> CudaResourcePartitioningTest
- name: Run CUDA Plugin EP C++ Tests
run: |
docker run --rm --gpus all \
-v ${{ github.workspace }}:/onnxruntime_src \
-v ${{ runner.temp }}/Release:/build/Release \
-e NVIDIA_VISIBLE_DEVICES=all \
${{ steps.build_docker_image_step.outputs.full-image-name }} \
bash -c "
set -ex
export PATH=/opt/python/cp312-cp312/bin:\$PATH
# Make libcudart.so.13 (and the plugin's CUDA deps) findable; see note above.
export LD_LIBRARY_PATH=/build/Release/Release:/usr/local/cuda-13.0/lib64:\${LD_LIBRARY_PATH:-}

cd /build/Release/Release
ls -la onnxruntime_provider_test libonnxruntime_providers_cuda_plugin.so
./onnxruntime_provider_test --gtest_filter='CudaPlugin*:CudaResourcePartitioning*'
"
4 changes: 2 additions & 2 deletions .github/workflows/windows_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ jobs:
DocUpdateNeeded: false
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e

test:
name: Windows GPU CUDA CI Pipeline Test Job
Expand Down Expand Up @@ -260,4 +260,4 @@ jobs:
DocUpdateNeeded: false
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
4 changes: 2 additions & 2 deletions .github/workflows/windows_cuda_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ jobs:
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e

test:
name: Windows CUDA Plugin EP Test
Expand Down Expand Up @@ -214,4 +214,4 @@ jobs:
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
2 changes: 1 addition & 1 deletion .github/workflows/windows_gpu_doc_gen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows_openvino.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
timeout-minutes: 240
env:
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
OnnxRuntimeBuildDirectory: ${{ github.workspace }}
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows_qnn_x64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
QnnLibKind: [shared_lib, static_lib]
env:
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/windows_tensorrt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ jobs:
DocUpdateNeeded: false
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e

test:
name: Windows GPU TensorRT CI Pipeline Test Job
Expand Down Expand Up @@ -265,4 +265,4 @@ jobs:
DocUpdateNeeded: false
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/downlo
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/google/pthreadpool/archive/dcc9f28589066af0dbd4555579281230abbf74dd.zip;533a77943203ef15ca608bcd9dbe2c94da7451d2
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.zip;a064e663b4d7a337ac291d1bef7337ef4e60a1ae
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/403d652dca4c1046e8145950b1c0997a9f748b57.zip;30b2a07fe4bae8574f89176e56274cacdd6d135b
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/4628dc060ce4e82345dc166bbac875609db4ff69.zip;e58d4b47c16a982111c897e669ae4f1821a393d7
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
Expand Down
4 changes: 1 addition & 3 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,7 @@ if (CPUINFO_SUPPORTED)
PATCH_COMMAND
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_cpuinfo_h_for_arm64ec.patch &&
# https://github.com/pytorch/cpuinfo/pull/324
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch &&
# https://github.com/pytorch/cpuinfo/pull/348
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/win_arm_fp16_detection_fallback.patch
${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch
FIND_PACKAGE_ARGS NAMES cpuinfo
)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
${MLAS_SRC_DIR}/sqnbitgemm_q8_block.h
${MLAS_SRC_DIR}/flashattn.cpp
${MLAS_SRC_DIR}/flashattn_qkv.cpp
${MLAS_SRC_DIR}/flashattn_gqa.cpp
${MLAS_SRC_DIR}/qkv_quant.cpp
${MLAS_SRC_DIR}/cast.cpp
${MLAS_SRC_DIR}/layernorm.cpp
Expand Down
32 changes: 17 additions & 15 deletions cmake/onnxruntime_providers_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
target_compile_definitions(onnxruntime_providers_cuda PRIVATE FILE_NAME=\"onnxruntime_providers_cuda.dll\")
endif()
# Work around a CUDA 13.x cudafe++ (EDG front-end) regression that mis-parses CCCL's
# Work around a CUDA 13.3 cudafe++ (EDG front-end) regression that mis-parses CCCL's
# global-qualified partial specializations, e.g. in <cub/device/device_transform.cuh>:
# template <typename T>
# struct ::cuda::proclaims_copyable_arguments<...> : ::cuda::std::true_type {};
Expand All @@ -218,7 +218,7 @@
# corrected copies of the affected headers into the build tree and place that directory
# ahead of the toolkit cccl include path. This is a no-op on toolkits whose headers do not
# contain the offending pattern (e.g. once NVIDIA fixes it), so it is safe to keep enabled.
function(ort_cuda13_patch_cccl_header src dst)
function(ort_cuda133_patch_cccl_header src dst)
if (NOT EXISTS "${src}")
return()
endif()
Expand Down Expand Up @@ -412,19 +412,21 @@
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
foreach(inc_dir ${CUDAToolkit_INCLUDE_DIRS})
if (EXISTS "${inc_dir}/cccl")
# Generate cudafe++-parseable copies of the CCCL headers that contain global-qualified
# partial specializations (see ort_cuda13_patch_cccl_header above) and put the fixed
# directory ahead of the toolkit cccl include so the corrected headers win.
set(_ort_cccl_fix_dir "${CMAKE_CURRENT_BINARY_DIR}/cccl_cuda13_fix")
ort_cuda13_patch_cccl_header(
"${inc_dir}/cccl/cub/device/device_transform.cuh"
"${_ort_cccl_fix_dir}/cub/device/device_transform.cuh")
ort_cuda13_patch_cccl_header(
"${inc_dir}/cccl/cub/device/dispatch/tuning/tuning_transform.cuh"
"${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
if (EXISTS "${_ort_cccl_fix_dir}/cub/device/device_transform.cuh" OR
EXISTS "${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
target_include_directories(${target} BEFORE PRIVATE "${_ort_cccl_fix_dir}")
if (UNIX AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.3 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.4)
# Generate cudafe++-parseable copies of the CCCL headers that contain global-qualified
# partial specializations (see ort_cuda133_patch_cccl_header above) and put the fixed
# directory ahead of the toolkit cccl include so the corrected headers win.
set(_ort_cccl_fix_dir "${CMAKE_CURRENT_BINARY_DIR}/cccl_cuda13_fix")
ort_cuda133_patch_cccl_header(
"${inc_dir}/cccl/cub/device/device_transform.cuh"
"${_ort_cccl_fix_dir}/cub/device/device_transform.cuh")
ort_cuda133_patch_cccl_header(
"${inc_dir}/cccl/cub/device/dispatch/tuning/tuning_transform.cuh"
"${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
if (EXISTS "${_ort_cccl_fix_dir}/cub/device/device_transform.cuh" OR
EXISTS "${_ort_cccl_fix_dir}/cub/device/dispatch/tuning/tuning_transform.cuh")
target_include_directories(${target} BEFORE PRIVATE "${_ort_cccl_fix_dir}")
endif()
endif()
# Add the cccl subdirectory to the include path so <cuda/std/utility> can be found
Expand Down
9 changes: 5 additions & 4 deletions cmake/onnxruntime_providers_cuda_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/tensor/sequence_op\\.cc$")
# in the CPU provider and is not linked into the plugin.
list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/tensor/size\\.cc$")

# Permanently excluded — pure CPU ops, handled by GetCpuPreferredNodes.
# shape_op.cc inherits from onnxruntime::OpKernel (framework)
# which cannot convert to ep::adapter::OpKernel in the plugin build.
list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/tensor/shape_op\\.cc$")
# shape_op.cc is INCLUDED in the plugin build. It provides an adapter-based
# Shape kernel under #ifdef BUILD_CUDA_EP_AS_PLUGIN (the CPU onnxruntime::Shape
# class, which derives from the framework OpKernel, is only used in the
# non-plugin build). Registering Shape on the EP keeps it off the CPU EP and
# avoids Memcpy nodes that would otherwise break CUDA Graph capture.

# Exclude contrib training ops (shrunken_gather depends on provider_api.h in header).
list(FILTER CUDA_PLUGIN_EP_CC_SRCS EXCLUDE REGEX ".*/contrib_ops/cuda/tensor/shrunken_gather\\.cc$")
Expand Down
6 changes: 4 additions & 2 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ set (onnxruntime_shared_lib_test_SRC
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_data_copy.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ep_context_data_api.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_experimental_api.cc
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc
Expand Down Expand Up @@ -1609,8 +1610,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)

endif()


if(onnxruntime_USE_QNN)
# Build ep_weight_sharing_ctx_gen for all supported EPs (QNN, TensorRT, OpenVINO, VitisAI)
if(onnxruntime_USE_QNN OR onnxruntime_USE_TENSORRT OR onnxruntime_USE_OPENVINO OR onnxruntime_USE_VITISAI)
#qnn ctx generator
set(ep_weight_sharing_ctx_gen_src_dir ${TEST_SRC_DIR}/ep_weight_sharing_ctx_gen)
set(ep_weight_sharing_ctx_gen_src_patterns
Expand Down Expand Up @@ -2174,6 +2175,7 @@ if (onnxruntime_BUILD_SHARED_LIB AND
#
file(GLOB onnxruntime_autoep_test_library_src "${TEST_SRC_DIR}/autoep/library/example_plugin_ep/*.h"
"${TEST_SRC_DIR}/autoep/library/example_plugin_ep/*.cc"
"${TEST_SRC_DIR}/autoep/library/ep_context_data_utils.h"
"${TEST_SRC_DIR}/autoep/library/plugin_ep_utils.h")
onnxruntime_add_shared_library_module(example_plugin_ep ${onnxruntime_autoep_test_library_src})
target_include_directories(example_plugin_ep PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session)
Expand Down
58 changes: 50 additions & 8 deletions cmake/patches/cpuinfo/fix_missing_sysfs_fallback.patch
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
diff --git a/src/linux/processors.c b/src/linux/processors.c
index 47bee76..d0c5569 100644
index fd040a3..2ca8ec4 100644
--- a/src/linux/processors.c
+++ b/src/linux/processors.c
@@ -2,0 +3 @@
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
@@ -291,0 +293,22 @@

#if !defined(__ANDROID__)
/*
@@ -289,6 +290,28 @@ static bool max_processor_number_parser(uint32_t processor_list_start, uint32_t
return true;
}

+static uint32_t cpuinfo_linux_get_max_processor_from_sysconf(
+ uint32_t max_processors_count,
+ const char* processor_list_name) {
Expand All @@ -27,13 +36,31 @@ index 47bee76..d0c5569 100644
+ return max_processor;
+}
+
@@ -301 +324 @@
uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count) {
uint32_t max_possible_processor = 0;
if (!cpuinfo_linux_parse_cpulist(
@@ -298,7 +321,7 @@ uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count)
#else
cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
#endif
- return UINT32_MAX;
+ return cpuinfo_linux_get_max_processor_from_sysconf(max_processors_count, POSSIBLE_CPULIST_FILENAME);
@@ -323 +346 @@
}
if (max_possible_processor >= max_processors_count) {
cpuinfo_log_warning(
@@ -320,7 +343,7 @@ uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count)
#else
cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
#endif
- return UINT32_MAX;
+ return cpuinfo_linux_get_max_processor_from_sysconf(max_processors_count, PRESENT_CPULIST_FILENAME);
@@ -357,0 +381,31 @@
}
if (max_present_processor >= max_processors_count) {
cpuinfo_log_warning(
@@ -355,6 +378,37 @@ static bool detect_processor_parser(uint32_t processor_list_start, uint32_t proc
return true;
}

+static bool cpuinfo_linux_detect_processors_from_sysconf(
+ uint32_t max_processors_count,
+ uint32_t* processor0_flags,
Expand Down Expand Up @@ -65,19 +92,34 @@ index 47bee76..d0c5569 100644
+ return true;
+}
+
@@ -373 +427,6 @@
bool cpuinfo_linux_detect_possible_processors(
uint32_t max_processors_count,
uint32_t* processor0_flags,
@@ -370,7 +424,12 @@ bool cpuinfo_linux_detect_possible_processors(
return true;
} else {
cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME);
- return false;
+ return cpuinfo_linux_detect_processors_from_sysconf(
+ max_processors_count,
+ processor0_flags,
+ processor_struct_size,
+ possible_flag,
+ POSSIBLE_CPULIST_FILENAME);
@@ -392 +451,6 @@
}
}

@@ -389,7 +448,12 @@ bool cpuinfo_linux_detect_present_processors(
return true;
} else {
cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME);
- return false;
+ return cpuinfo_linux_detect_processors_from_sysconf(
+ max_processors_count,
+ processor0_flags,
+ processor_struct_size,
+ present_flag,
+ PRESENT_CPULIST_FILENAME);
}
}

4 changes: 2 additions & 2 deletions cmake/patches/cpuinfo/patch_vcpkg_arm64ec_support.patch
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aedc983..dab589e 100644
index 072c987..e43d6ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,6 +72,17 @@ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "am
ENDIF()
IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$")
SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
+ELSEIF(MSVC AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.10")
+ # Use CMAKE_C_COMPILER_ARCHITECTURE_ID. MSVC values are documented as available since CMake 3.10.
+ # Use CMAKE_C_COMPILER_ARCHITECTURE_ID for non-VS generators (e.g. Ninja) with MSVC.
+ IF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "X86")
+ SET(CPUINFO_TARGET_PROCESSOR "x86")
+ ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "x64")
Expand Down
19 changes: 0 additions & 19 deletions cmake/patches/cpuinfo/win_arm_fp16_detection_fallback.patch

This file was deleted.

4 changes: 2 additions & 2 deletions cmake/vcpkg-ports/cpuinfo/patch_vcpkg_arm64ec_support.patch
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aedc983..dab589e 100644
index 072c987..e43d6ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,6 +72,17 @@ IF(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND CPUINFO_TARGET_PROCESSOR STREQUAL "am
ENDIF()
IF(IS_APPLE_OS AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64.*)$")
SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
+ELSEIF(MSVC AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.10")
+ # Use CMAKE_C_COMPILER_ARCHITECTURE_ID. MSVC values are documented as available since CMake 3.10.
+ # Use CMAKE_C_COMPILER_ARCHITECTURE_ID for non-VS generators (e.g. Ninja) with MSVC.
+ IF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "X86")
+ SET(CPUINFO_TARGET_PROCESSOR "x86")
+ ELSEIF(CMAKE_C_COMPILER_ARCHITECTURE_ID STREQUAL "x64")
Expand Down
Loading
Loading