Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions .github/workflows/linux_cuda_no_cudnn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
name: Linux CUDA No cuDNN CI

on:
pull_request:
branches: [main, 'rel-*']
paths:
- '.github/workflows/linux_cuda_no_cudnn.yml'
- 'cmake/onnxruntime_providers_cuda.cmake'
- 'cmake/onnxruntime_python.cmake'
- 'onnxruntime/__init__.py'
- 'onnxruntime/core/providers/cuda/**'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

permissions:
contents: read
packages: write
attestations: write
id-token: write

jobs:
build-linux-cuda-no-cudnn-x64-release:
name: Build Linux CUDA x64 Release without cuDNN link
uses: ./.github/workflows/reusable_linux_build.yml
with:
pool_name: "onnxruntime-github-Ubuntu2204-AMD-CPU"
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
docker_image_repo: onnxruntimecuda13manylinuxbuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --parallel --nvcc_threads 4 --flash_nvcc_threads 4 --cuda_version=13.0 --cuda_home=/usr/local/cuda-13.0 --cudnn_home=/usr/local/cuda-13.0 --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp312-cp312/bin:$PATH'
run_tests: false
upload_build_output: true
execution_providers: 'cuda'
job_identifier: build-linux-cuda-no-cudnn-x64-release
secrets:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

smoke-linux-cuda-no-cudnn-x64-release:
name: Smoke Linux CUDA x64 Release without cuDNN runtime use
needs: build-linux-cuda-no-cudnn-x64-release
runs-on:
- self-hosted
- "1ES.Pool=onnxruntime-github-linux-a10"
- "1ES.ImageOverride=onnxruntime-ubuntu2204-CUDA-A10-Test"
- "JobId=smoke-linux-cuda-no-cudnn-x64-release-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
permissions:
contents: read
packages: read
steps:
- name: Checkout code
uses: actions/checkout@v6

- uses: microsoft/onnxruntime-github-actions/build-docker-image@8bad63a3c05d448311dfa8e5f531171c97471aa1
id: build_docker_image_step
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda13manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Download Build Artifact
uses: actions/download-artifact@v7
with:
name: build-output-x64-Release
path: ${{ runner.temp }}/Release

- name: Restore Executable Permissions
working-directory: ${{ runner.temp }}/Release
run: |
if [ -f perms.txt ]; then
while IFS= read -r file; do
if [ -f "$file" ]; then
chmod +x "$file"
fi
done < perms.txt
fi

- name: Verify CUDA provider has no direct cuDNN dependency
run: |
docker run --rm --gpus all \
-v "${{ runner.temp }}/Release:/build/Release" \
"${{ steps.build_docker_image_step.outputs.full-image-name }}" \
bash -lc 'set -euo pipefail
ldd /build/Release/Release/libonnxruntime_providers_cuda.so | tee /tmp/ldd.txt
! grep -i cudnn /tmp/ldd.txt'

- name: Run no-cuDNN CUDA EP smoke test
run: |
docker run --rm --gpus all \
-v "${{ runner.temp }}/Release:/build/Release" \
"${{ steps.build_docker_image_step.outputs.full-image-name }}" \
bash -lc 'set -e
PATH=/opt/python/cp312-cp312/bin:$PATH
LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:${LD_LIBRARY_PATH:-}
export PATH LD_LIBRARY_PATH
WHEEL_PATH=$(find /build/Release/Release/dist -type f -name "onnxruntime_gpu-*.whl" | head -n 1)
if [ -z "$WHEEL_PATH" ]; then
echo "No built onnxruntime GPU wheel found under /build/Release/Release/dist" >&2
exit 1
fi
echo "Installing $WHEEL_PATH"
python -m pip install --no-cache-dir --force-reinstall --no-deps numpy onnx "$WHEEL_PATH"
python - <<"PY"
import numpy as np
import onnx
import onnxruntime as ort
from onnx import TensorProto, helper

x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [2, 3])
y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [2, 3])
node = helper.make_node("Add", ["x", "x"], ["y"])
graph = helper.make_graph([node], "cuda_no_cudnn_smoke", [x], [y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 21)])
model.ir_version = 10

providers = [("CUDAExecutionProvider", {"enable_cudnn": "0"})]
sess = ort.InferenceSession(model.SerializeToString(), providers=providers)
data = np.arange(6, dtype=np.float32).reshape(2, 3)
result = sess.run(None, {"x": data})[0]
np.testing.assert_allclose(result, data + data)
print("CUDA no-cuDNN smoke test passed")
PY'
262 changes: 262 additions & 0 deletions .github/workflows/windows_cuda_no_cudnn.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
name: Windows CUDA No cuDNN CI

on:
pull_request:
branches: [main, 'rel-*']
paths:
- '.github/workflows/windows_cuda_no_cudnn.yml'
- 'cmake/onnxruntime_providers_cuda.cmake'
- 'cmake/onnxruntime_providers_cuda_plugin.cmake'
- 'cmake/onnxruntime_python.cmake'
- 'docs/CUDA_cuDNN_Optional_Design.md'
- 'docs/cuda_plugin_ep/**'
- 'onnxruntime/__init__.py'
- 'onnxruntime/core/providers/cuda/**'
- 'onnxruntime/test/python/transformers/test_cuda_plugin_ep.py'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.sha }}
cancel-in-progress: true

jobs:
build:
name: Windows CUDA Plugin EP Build without cuDNN
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-vs2022-latest",
"JobId=windows-cuda-plugin-no-cudnn-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: 'none'

- uses: actions/setup-python@v6
with:
python-version: '3.12'
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd

- name: Download CUDA SDK v13.0
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v13.0" .
dir
shell: pwsh

- name: Prepare cuDNN SDK without runtime DLLs
working-directory: ${{ runner.temp }}
run: |
$cudnnSdkUri = "https://lotusscus.blob.core.windows.net/models/cudnn_sdk/$env:CUDNN_FOLDER"
azcopy.exe cp --recursive $cudnnSdkUri .
$cudnnRoot = Join-Path $env:RUNNER_TEMP $env:CUDNN_FOLDER
if (-not (Test-Path $cudnnRoot)) {
Write-Error "cuDNN SDK was not downloaded to the expected folder: $cudnnRoot"
exit 1
}
Get-ChildItem -Path $cudnnRoot -Recurse -Include "cudnn*.dll" | Remove-Item -Force
if (Get-ChildItem -Path $cudnnRoot -Recurse -Include "cudnn*.dll" -ErrorAction SilentlyContinue) {
Write-Error "cuDNN runtime DLLs must not be present in the no-cuDNN build environment"
exit 1
}
shell: pwsh

- name: Add CUDA to PATH
shell: pwsh
run: |
Write-Host "Adding CUDA to PATH without adding any cuDNN directory"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v13.0\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v13.0\bin\x64"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v13.0\extras\CUPTI\lib64"

- name: Install CUDA Visual Studio integration
shell: pwsh
run: |
$sourceDir = "$env:RUNNER_TEMP\v13.0\extras\visual_studio_integration\MSBuildExtensions"
$targetDir = "${env:ProgramFiles}\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations"
Copy-Item -Path "$sourceDir\*" -Destination $targetDir -Force

- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path $env:RUNNER_TEMP "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV

- name: Build ONNX Runtime with CUDA Plugin EP and no cuDNN runtime path
working-directory: ${{ runner.temp }}
shell: pwsh
run: |
python.exe ${{ github.workspace }}\tools\ci_build\build.py `
--update --build --config Release `
--build_dir build `
--skip_submodule_sync `
--parallel `
--nvcc_threads 4 `
--flash_nvcc_threads 4 `
--use_binskim_compliant_compile_flags `
--cmake_generator "Visual Studio 17 2022" `
--build_shared_lib `
--build_wheel `
--use_cuda `
--cuda_home="$env:RUNNER_TEMP\v13.0" `
--cudnn_home="$env:RUNNER_TEMP\$env:CUDNN_FOLDER" `
--skip_tests `
--use_vcpkg `
--use_vcpkg_ms_internal_asset_cache `
--enable_cuda_profiling `
--cmake_extra_defines onnxruntime_QUICK_BUILD=ON `
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 `
--cmake_extra_defines onnxruntime_BUILD_CUDA_EP_AS_PLUGIN=ON

if ($lastExitCode -ne 0) {
exit $lastExitCode
}

$outputDir = "${{ runner.temp }}\build\Release"
Write-Host "Cleaning up files from $outputDir..."
Remove-Item -Path "$outputDir\onnxruntime" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\pybind11" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\models" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\vcpkg_installed" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\_deps" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeCache.txt" -Force -ErrorAction SilentlyContinue
Remove-Item -Path "$outputDir\CMakeFiles" -Recurse -Force -ErrorAction SilentlyContinue
Remove-Item -Path $outputDir -Include "*.obj" -Recurse

$cudnnArtifacts = Get-ChildItem -Path $outputDir -Recurse -Include "cudnn*.dll" -ErrorAction SilentlyContinue
if ($cudnnArtifacts) {
$cudnnArtifacts | ForEach-Object { Write-Host $_.FullName }
Write-Error "cuDNN runtime DLLs must not be present in no-cuDNN build artifacts"
exit 1
}

- name: Upload build artifacts
uses: actions/upload-artifact@v6
with:
name: cuda-plugin-no-cudnn-build-artifacts
path: ${{ runner.temp }}\build
env:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
CUDNN_FOLDER: 9.14.0.64_cuda13

test:
name: Windows CUDA Plugin EP Test without cuDNN
needs: build
timeout-minutes: 120
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"JobId=windows-cuda-plugin-no-cudnn-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: 'none'

- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: cuda-plugin-no-cudnn-build-artifacts
path: ${{ runner.temp }}\build

- uses: actions/setup-python@v6
with:
python-version: '3.12'
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r .\tools\ci_build\github\windows\python\requirements.txt
working-directory: ${{ github.workspace }}
shell: cmd

- name: Install torch for CPU only
run: python -m pip install torch
working-directory: ${{ github.workspace }}
shell: cmd

- name: Download CUDA SDK v13.0
working-directory: ${{ runner.temp }}
run: |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v13.0" .
dir
shell: pwsh

- name: Add CUDA to PATH
shell: pwsh
run: |
Write-Host "Adding CUDA to PATH without adding any cuDNN directory"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v13.0\bin"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v13.0\bin\x64"
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\v13.0\extras\CUPTI\lib64"

- name: Set OnnxRuntimeBuildDirectory
shell: pwsh
run: |
$buildDir = Join-Path $env:RUNNER_TEMP "build"
echo "OnnxRuntimeBuildDirectory=$buildDir" >> $env:GITHUB_ENV

- name: Install ONNX Runtime Wheel
uses: ./.github/actions/install-onnxruntime-wheel
with:
whl-directory: ${{ runner.temp }}\build\Release\Release\dist

- name: Verify GPU access
shell: pwsh
run: nvidia-smi

- name: Verify CUDA plugin has no direct cuDNN dependency
shell: pwsh
run: |
$pluginPath = "${{ runner.temp }}\build\Release\Release\onnxruntime_providers_cuda_plugin.dll"
if (-not (Test-Path $pluginPath)) {
Write-Error "CUDA plugin EP library not found at $pluginPath"
exit 1
}

dumpbin /dependents $pluginPath | Tee-Object -FilePath $env:RUNNER_TEMP\cuda_plugin_dependents.txt
if (Select-String -Path $env:RUNNER_TEMP\cuda_plugin_dependents.txt -Pattern "cudnn" -SimpleMatch -Quiet) {
Write-Error "CUDA plugin EP has a direct cuDNN dependency"
exit 1
}

- name: Run CUDA Plugin EP Python Tests without cuDNN
working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers
shell: pwsh
run: |
$env:ORT_CUDA_PLUGIN_PATH = "${{ runner.temp }}\build\Release\Release\onnxruntime_providers_cuda_plugin.dll"
$env:ORT_TEST_CUDA_PLUGIN_EP = "1"
$env:ORT_TEST_CUDA_PLUGIN_NO_CUDNN = "1"
Write-Host "ORT_CUDA_PLUGIN_PATH=$env:ORT_CUDA_PLUGIN_PATH"
python test_cuda_plugin_ep.py
if ($lastExitCode -ne 0) {
exit $lastExitCode
}
env:
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true
setVcvars: true
ALLOW_RELEASED_ONNX_OPSET_ONLY: '0'
ONNXRUNTIME_TEST_GPU_DEVICE_ID: '0'
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: d712a4c7-a0cf-4e87-af75-31510eba0a8e
Loading
Loading