diff --git a/.github/build.bat b/.github/build.bat index e4991a1a..d48f0957 100755 --- a/.github/build.bat +++ b/.github/build.bat @@ -38,15 +38,28 @@ if /I "%USE_CACHE%"=="true" ( ) ) +REM NOTE: nvcc is NOT wrapped with sccache on Windows. Unlike build.sh (Linux) -- where +REM sccache caches the per-arch .cu device passes -- sccache on Windows cannot parse the +REM nvcc command line (it dies with `sccache: error: Could not parse shell line` and +REM fails every .cu compile). So CUDA device code is built by nvcc directly (uncached) +REM here; the cl.exe C/C++ TUs still cache via the C/CXX launcher set above. + mkdir build cmake -Bbuild %LAUNCH% %* -if errorlevel 1 exit /b %ERRORLEVEL% +if errorlevel 1 exit /b 1 cmake --build build --config Release -if errorlevel 1 exit /b %ERRORLEVEL% +set "BUILD_RC=!ERRORLEVEL!" -REM Only query stats when sccache was actually wired in as the launcher; re-invoking -REM a rejected/crashing sccache here would just repeat its failure output. +REM Print cache stats (best-effort) regardless of build outcome -- only when sccache +REM was wired in as the launcher. if defined LAUNCH ( echo build.bat: sccache --show-stats sccache --show-stats ) + +REM Propagate a build failure as a non-zero exit (a prior bug let a failed `cmake +REM --build` reach here and exit 0, masquerading as a green build with no artifacts). +if not "!BUILD_RC!"=="0" ( + echo build.bat: cmake --build failed with exit code !BUILD_RC!. + exit /b !BUILD_RC! +) diff --git a/.github/build_opencl_android.sh b/.github/build_opencl_android.sh index efa3789c..491a59b5 100755 --- a/.github/build_opencl_android.sh +++ b/.github/build_opencl_android.sh @@ -20,9 +20,9 @@ HEADERS_DIR="$OPENCL_STAGE/OpenCL-Headers" LOADER_DIR="$OPENCL_STAGE/OpenCL-ICD-Loader" LOADER_BUILD="$LOADER_DIR/build" -# Pinned tags for reproducibility. -HEADERS_TAG=v2025.07.22 -LOADER_TAG=v2025.07.22 +# Pinned tags for reproducibility (OpenCL 3.1.1 spec release). +HEADERS_TAG=v2026.05.29 +LOADER_TAG=v2026.05.29 if [ ! -d "$HEADERS_DIR" ]; then mkdir -p "$OPENCL_STAGE" diff --git a/.github/build_opencl_windows.bat b/.github/build_opencl_windows.bat new file mode 100644 index 00000000..dbc9c5b4 --- /dev/null +++ b/.github/build_opencl_windows.bat @@ -0,0 +1,54 @@ +REM SPDX-FileCopyrightText: 2026 Bernard Ladenthin +REM +REM SPDX-License-Identifier: MIT +REM +REM Windows x86_64 build with the OpenCL backend enabled, shipped as the +REM `opencl-windows-x86-64` classifier. The windows-2025 runner image ships +REM neither OpenCL headers nor an OpenCL import library, so this script first +REM stages Khronos OpenCL-Headers and builds OpenCL-ICD-Loader (producing +REM OpenCL.lib) before delegating the jllama configure+build to build.bat with +REM the OpenCL paths. Mirrors build_opencl_android.sh. +REM +REM At runtime the GPU vendor's ICD (System32\OpenCL.dll, installed by the +REM NVIDIA/AMD/Intel driver) provides the actual OpenCL symbols; we link only +REM against the loader's import library, so no OpenCL.dll is shipped. + +@echo off +setlocal enabledelayedexpansion + +set "OPENCL_STAGE=%RUNNER_TEMP%\opencl-stage" +if "%RUNNER_TEMP%"=="" set "OPENCL_STAGE=%TEMP%\opencl-stage" +set "HEADERS_DIR=%OPENCL_STAGE%\OpenCL-Headers" +set "LOADER_DIR=%OPENCL_STAGE%\OpenCL-ICD-Loader" +set "LOADER_BUILD=%LOADER_DIR%\build" + +REM Pinned tags for reproducibility (OpenCL 3.1.1; match build_opencl_android.sh). +set "HEADERS_TAG=v2026.05.29" +set "LOADER_TAG=v2026.05.29" + +if not exist "%HEADERS_DIR%" ( + git clone --depth 1 --branch %HEADERS_TAG% https://github.com/KhronosGroup/OpenCL-Headers.git "%HEADERS_DIR%" + if errorlevel 1 exit /b 1 +) + +if not exist "%LOADER_BUILD%\Release\OpenCL.lib" if not exist "%LOADER_BUILD%\OpenCL.lib" ( + if not exist "%LOADER_DIR%" ( + git clone --depth 1 --branch %LOADER_TAG% https://github.com/KhronosGroup/OpenCL-ICD-Loader.git "%LOADER_DIR%" + if errorlevel 1 exit /b 1 + ) + cmake -B "%LOADER_BUILD%" -S "%LOADER_DIR%" -DOPENCL_ICD_LOADER_HEADERS_DIR="%HEADERS_DIR%" -DBUILD_TESTING=OFF + if errorlevel 1 exit /b 1 + cmake --build "%LOADER_BUILD%" --config Release + if errorlevel 1 exit /b 1 +) + +REM Resolve the import library: multi-config generators emit build\Release\OpenCL.lib, +REM single-config ones emit build\OpenCL.lib. +set "OPENCL_LIB=%LOADER_BUILD%\Release\OpenCL.lib" +if not exist "%OPENCL_LIB%" set "OPENCL_LIB=%LOADER_BUILD%\OpenCL.lib" + +REM Delegate to build.bat so the jllama build inherits the sccache probe + Depot +REM cache launcher and --show-stats output. The OpenCL paths satisfy ggml's +REM find_package(OpenCL); the caller appends -G/-DGGML_OPENCL/-DOS_* via %*. +call .github\build.bat -DOpenCL_INCLUDE_DIR="%HEADERS_DIR%" -DOpenCL_LIBRARY="%OPENCL_LIB%" %* +exit /b %ERRORLEVEL% diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c2d82f36..eefed904 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -476,8 +476,8 @@ jobs: name: macos-14-libraries path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ - build-windows-x86_64: - name: Build and Test Windows 2025 x86_64 (VS 2026) + build-windows-x86_64-msvc: + name: Build and Test Windows 2025 x86_64 (MSVC / VS 2026, classifier) needs: [startgate, build-webui] runs-on: windows-2025-vs2026 steps: @@ -507,11 +507,11 @@ jobs: - name: Upload artifacts uses: actions/upload-artifact@v7 with: - name: Windows-x86_64-libraries + name: Windows-x86_64-msvc path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ - build-windows-x86: - name: Build and Test Windows 2025 x86 (VS 2026) + build-windows-x86-msvc: + name: Build and Test Windows 2025 x86 (MSVC / VS 2026, classifier) needs: [startgate, build-webui] runs-on: windows-2025-vs2026 steps: @@ -541,25 +541,25 @@ jobs: - name: Upload artifacts uses: actions/upload-artifact@v7 with: - name: Windows-x86-libraries + name: Windows-x86-msvc path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ # --------------------------------------------------------------------------- - # Windows Ninja Multi-Config + sccache — EVALUATION jobs (not yet released). - # The Visual Studio generator ignores CMAKE_{C,CXX}_COMPILER_LAUNCHER, so the two - # build-windows-* jobs above are the only uncached native builds. Upstream - # llama.cpp ships its windows-cuda artifact with "Ninja Multi-Config" + MSVC, - # which proves the combination works on the same tree. These two jobs run that - # combination in parallel with the trusted VS jobs and front cl.exe with sccache - # over Depot WebDAV (build.bat probe-guards it). Artifacts are named - # `Windows-*-ninja` (NOT `*-libraries`) so the package job's `pattern: "*-libraries"` - # does NOT pick them up — they are evaluation-only until cache hits are confirmed, - # at which point the release path is switched over (see TODO.md). The package job's - # `needs:` is intentionally left unchanged. + # Windows Ninja Multi-Config + sccache — the DEFAULT Windows CPU natives. + # The Visual Studio generator ignores CMAKE_{C,CXX}_COMPILER_LAUNCHER, so only the + # Ninja Multi-Config generator can front cl.exe with sccache over Depot WebDAV + # (build.bat probe-guards it). Both generators use the same MSVC toolchain (cl.exe, + # static /MT CRT) on the same runner, so the produced jllama.dll/llama.dll/ggml.dll + # are functionally equivalent with identical runtime dependencies — the only delta + # is build-system plumbing + caching. The Ninja build is therefore the default JAR + # (artifacts `Windows-*-libraries`, picked up by the package job's `pattern: + # "*-libraries"`); the MSVC build above is shipped as the `msvc-windows` classifier + # for anyone who wants the Visual-Studio-generator natives. Upstream llama.cpp also + # builds its Windows artifacts with Ninja Multi-Config + MSVC. # --------------------------------------------------------------------------- - build-windows-x86_64-ninja: - name: Build and Test Windows 2025 x86_64 (Ninja Multi-Config + sccache, eval) + build-windows-x86_64: + name: Build and Test Windows 2025 x86_64 (Ninja Multi-Config + sccache, default) needs: [startgate, build-webui] runs-on: windows-2025-vs2026 env: @@ -609,11 +609,11 @@ jobs: - name: Upload artifacts uses: actions/upload-artifact@v7 with: - name: Windows-x86_64-ninja + name: Windows-x86_64-libraries path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ - build-windows-x86-ninja: - name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, eval) + build-windows-x86: + name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, default) needs: [startgate, build-webui] runs-on: windows-2025-vs2026 env: @@ -663,9 +663,172 @@ jobs: - name: Upload artifacts uses: actions/upload-artifact@v7 with: - name: Windows-x86-ninja + name: Windows-x86-libraries path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + # --------------------------------------------------------------------------- + # Windows GPU classifiers (x86_64 only) — CUDA, Vulkan, OpenCL. + # All three use the same Ninja Multi-Config + MSVC + sccache toolchain as the + # default CPU build; they differ only by the GGML backend flag (and the build-time + # SDK each needs). CMakeLists.txt routes each backend's output to its own + # src/main/resources_windows_{cuda,vulkan,opencl}/ tree, which the matching Maven + # profile (cuda-windows / vulkan-windows / opencl-windows) turns into a classifier + # JAR. GPU runtime libraries are NOT bundled — the consumer's GPU driver / toolkit + # provides them (CUDA: cudart64_13/cublas64_13 from the CUDA Toolkit; Vulkan: + # vulkan-1.dll from the driver; OpenCL: System32\OpenCL.dll from the driver). + # NOTE: GitHub-hosted Windows runners have NO GPU, so these jobs build + run the + # C++ unit suite (ctest, CPU-only) but cannot run model-backed GPU inference; + # end-to-end GPU validation is local / self-hosted. + # --------------------------------------------------------------------------- + + build-windows-x86_64-cuda: + name: Build Windows 2025 x86_64 CUDA (Ninja + sccache) + needs: [startgate, build-webui] + runs-on: windows-2025-vs2026 + env: + USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} + SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev + SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} + steps: + - uses: actions/checkout@v7 + - name: Download shared WebUI assets + uses: actions/download-artifact@v8 + with: + name: webui-generated + path: ${{ github.workspace }}/webui-generated/ + - name: Set up MSVC developer environment (x64) + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + - name: Install CUDA Toolkit + # Full toolkit install (default method: local, no sub-packages restriction). + # A reduced network sub-package set ("nvcc","cudart","cublas",…) omitted the + # nvcc crt headers (crt/host_config.h), so cmake's CUDA compiler detection + # failed at configure. The full installer ships every header reliably. + uses: Jimver/cuda-toolkit@v0.2.35 + id: cuda-toolkit + with: + cuda: '13.2.0' + - name: Install sccache (shared compiler cache) + if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' + continue-on-error: true + shell: pwsh + run: | + $ver = "0.16.0" + $rel = "sccache-v$ver-x86_64-pc-windows-msvc" + $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" + Write-Host "Downloading $url" + Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" + Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" + - name: Build libraries + shell: cmd + # GPU jobs build the artifact only — no -DBUILD_TESTING / ctest. The C++ unit + # suite is CPU-only and fully covered by the `C++ Tests` job + the CPU Windows + # jobs; a GPU-linked jllama_test.exe cannot be discovered/run on a GPU-less + # GitHub runner (it errors probing for a CUDA device -> ctest *_NOT_BUILT). + run: | + .github\build.bat -G "Ninja Multi-Config" -DGGML_CUDA=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 + - name: Upload artifacts + uses: actions/upload-artifact@v7 + with: + name: Windows-x86_64-cuda + path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + if-no-files-found: error + + build-windows-x86_64-vulkan: + name: Build Windows 2025 x86_64 Vulkan (Ninja + sccache) + needs: [startgate, build-webui] + runs-on: windows-2025-vs2026 + env: + USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} + SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev + SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} + steps: + - uses: actions/checkout@v7 + - name: Download shared WebUI assets + uses: actions/download-artifact@v8 + with: + name: webui-generated + path: ${{ github.workspace }}/webui-generated/ + - name: Set up MSVC developer environment (x64) + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + - name: Install Vulkan SDK + uses: jakoch/install-vulkan-sdk-action@v1.6.0 + with: + vulkan_version: 1.4.350.0 + cache: true + - name: Install sccache (shared compiler cache) + if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' + continue-on-error: true + shell: pwsh + run: | + $ver = "0.16.0" + $rel = "sccache-v$ver-x86_64-pc-windows-msvc" + $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" + Write-Host "Downloading $url" + Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" + Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" + - name: Build libraries + shell: cmd + # Build the artifact only (see the CUDA job's note: GPU-less runner can't run a + # GPU-linked jllama_test; the C++ unit suite is covered by the CPU jobs). + run: | + .github\build.bat -G "Ninja Multi-Config" -DGGML_VULKAN=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 + - name: Upload artifacts + uses: actions/upload-artifact@v7 + with: + name: Windows-x86_64-vulkan + path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + if-no-files-found: error + + build-windows-x86_64-opencl: + name: Build Windows 2025 x86_64 OpenCL (Ninja + sccache) + needs: [startgate, build-webui] + runs-on: windows-2025-vs2026 + env: + USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} + SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev + SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} + steps: + - uses: actions/checkout@v7 + - name: Download shared WebUI assets + uses: actions/download-artifact@v8 + with: + name: webui-generated + path: ${{ github.workspace }}/webui-generated/ + - name: Set up MSVC developer environment (x64) + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + - name: Install sccache (shared compiler cache) + if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' + continue-on-error: true + shell: pwsh + run: | + $ver = "0.16.0" + $rel = "sccache-v$ver-x86_64-pc-windows-msvc" + $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" + Write-Host "Downloading $url" + Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" + Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force + Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" + - name: Build libraries + shell: cmd + # Build the artifact only (see the CUDA job's note: GPU-less runner can't run a + # GPU-linked jllama_test; the C++ unit suite is covered by the CPU jobs). + run: | + .github\build_opencl_windows.bat -G "Ninja Multi-Config" -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 + - name: Upload artifacts + uses: actions/upload-artifact@v7 + with: + name: Windows-x86_64-opencl + path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ + if-no-files-found: error + # --------------------------------------------------------------------------- # CI-only jobs — no release artifact, purely for test coverage # --------------------------------------------------------------------------- @@ -1154,7 +1317,7 @@ jobs: if-no-files-found: warn test-java-windows-x86_64: - name: Java Tests Windows 2025 x86_64 (VS 2026) + name: Java Tests Windows 2025 x86_64 (default / Ninja) needs: build-windows-x86_64 runs-on: windows-2025-vs2026 steps: @@ -1258,13 +1421,13 @@ jobs: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* if-no-files-found: warn - # Java/inference validation of the Ninja-built x86_64 DLL (the analogue of - # test-java-windows-x86_64 for the MSVC build). Loads the Ninja jllama.dll via - # JNI and runs the full model-backed suite, so both Windows generators are - # validated end-to-end before the `ninja-windows` classifier JAR ships. - test-java-windows-x86_64-ninja: - name: Java Tests Windows 2025 x86_64 (Ninja, eval) - needs: build-windows-x86_64-ninja + # Java/inference validation of the MSVC-built x86_64 DLL (the analogue of + # test-java-windows-x86_64 for the default Ninja build). Loads the MSVC jllama.dll + # via JNI and runs the full model-backed suite, so both Windows generators are + # validated end-to-end before the `msvc-windows` classifier JAR ships. + test-java-windows-x86_64-msvc: + name: Java Tests Windows 2025 x86_64 (MSVC classifier) + needs: build-windows-x86_64-msvc runs-on: windows-2025-vs2026 steps: - uses: actions/checkout@v7 @@ -1281,7 +1444,7 @@ jobs: Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" - uses: actions/download-artifact@v8 with: - name: Windows-x86_64-ninja + name: Windows-x86_64-msvc path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) uses: actions/cache@v5 @@ -1355,7 +1518,7 @@ jobs: - if: failure() uses: actions/upload-artifact@v7 with: - name: windows-output-ninja + name: windows-output-msvc path: | ${{ github.workspace }}\hs_err_pid*.log ${{ github.workspace }}\*.hprof @@ -1378,9 +1541,13 @@ jobs: - crosscompile-linux-aarch64 - crosscompile-android-aarch64 - crosscompile-android-aarch64-opencl + - build-windows-x86_64 - build-windows-x86 - - build-windows-x86_64-ninja - - build-windows-x86-ninja + - build-windows-x86_64-msvc + - build-windows-x86-msvc + - build-windows-x86_64-cuda + - build-windows-x86_64-vulkan + - build-windows-x86_64-opencl - test-cpp-linux-x86_64 - build-macos-arm64-metal-15 - test-java-linux-x86_64 @@ -1388,7 +1555,7 @@ jobs: - test-java-macos-arm64-no-metal - test-java-macos-arm64-metal-15 - test-java-windows-x86_64 - - test-java-windows-x86_64-ninja + - test-java-windows-x86_64-msvc runs-on: ubuntu-latest steps: - uses: actions/checkout@v7 @@ -1405,17 +1572,29 @@ jobs: with: name: android-libraries-opencl path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ - # Ninja-built Windows natives -> separate tree consumed by the `windows-ninja` - # Maven profile (the `ninja-windows` classifier JAR). The default JAR keeps the - # MSVC `*-libraries` natives downloaded above. + # MSVC-built Windows natives -> `msvc-windows` classifier tree. The default JAR + # now ships the Ninja `*-libraries` natives merged above (default flip). - uses: actions/download-artifact@v8 with: - name: Windows-x86_64-ninja - path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ + name: Windows-x86_64-msvc + path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: - name: Windows-x86-ninja - path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ + name: Windows-x86-msvc + path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + # Windows GPU classifiers (x86_64 only) -> one tree each. + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-cuda + path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-vulkan + path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-opencl + path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ - uses: actions/setup-java@v5 with: distribution: 'temurin' @@ -1426,8 +1605,10 @@ jobs: # default-platform native libs in one drop-on-classpath JAR, runnable via its # OpenAiCompatServer Main-Class). It lands in target/ and is uploaded in the `llama-jars` # artifact below - a CI run artifact only, not a Maven Central / GitHub-Release asset. - # `windows-ninja` attaches the `ninja-windows` classifier JAR (Ninja-built Windows natives). - run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja,assembly -Dmaven.test.skip=true -Dgpg.skip=true package + # Windows classifier JARs: `windows-msvc` (MSVC-built CPU natives) plus the GPU + # backends `cuda-windows` / `vulkan-windows` / `opencl-windows`. The default JAR's + # Windows natives are the Ninja `*-libraries` merged into src/main/resources/ above. + run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows,assembly -Dmaven.test.skip=true -Dgpg.skip=true package - name: Upload JARs uses: actions/upload-artifact@v7 with: @@ -1507,12 +1688,24 @@ jobs: path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: - name: Windows-x86_64-ninja - path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ + name: Windows-x86_64-msvc + path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86-msvc + path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-cuda + path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: - name: Windows-x86-ninja - path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ + name: Windows-x86_64-vulkan + path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-opencl + path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ - name: Set up Maven Central Repository uses: actions/setup-java@v5 with: @@ -1533,7 +1726,7 @@ jobs: *) echo "::error::Refusing to publish non-SNAPSHOT version '$VERSION' from the snapshot job. Snapshot publishing requires a -SNAPSHOT version; releases go through the v* tag path."; exit 1 ;; esac - name: Publish snapshot - run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy + run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows -Dmaven.test.skip=true deploy env: MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} @@ -1599,12 +1792,24 @@ jobs: path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: - name: Windows-x86_64-ninja - path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ + name: Windows-x86_64-msvc + path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86-msvc + path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-cuda + path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ + - uses: actions/download-artifact@v8 + with: + name: Windows-x86_64-vulkan + path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ - uses: actions/download-artifact@v8 with: - name: Windows-x86-ninja - path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ + name: Windows-x86_64-opencl + path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ - name: Set up Maven Central Repository uses: actions/setup-java@v5 with: @@ -1616,7 +1821,7 @@ jobs: gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} gpg-passphrase: MAVEN_GPG_PASSPHRASE - name: Publish release - run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy + run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows -Dmaven.test.skip=true deploy env: MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} diff --git a/.gitignore b/.gitignore index 8aabd814..be02ca4b 100644 --- a/.gitignore +++ b/.gitignore @@ -40,7 +40,12 @@ replay_pid* models/*.gguf src/main/cpp/net_ladenthin_llama_*.h src/main/resources_cuda_linux/ -src/main/resources_windows_ninja/ +# Per-classifier native trees, staged by CI before the matching Maven profile runs, +# never committed (same policy as the default-tree native libs below). +src/main/resources_windows_msvc/ +src/main/resources_windows_cuda/ +src/main/resources_windows_vulkan/ +src/main/resources_windows_opencl/ src/main/resources/**/*.so src/main/resources/**/*.dylib src/main/resources/**/*.dll diff --git a/CLAUDE.md b/CLAUDE.md index 1c2094d8..03743d91 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -163,49 +163,94 @@ At runtime the device must provide its own OpenCL ICD (`libOpenCL.so`); Qualcomm Adreno drivers do. Devices without an ICD should use the default CPU-only Android JAR. -## Windows Ninja artifact (sccache-cached, parallel to the MSVC build) - -The Visual Studio generator ignores `CMAKE_{C,CXX}_COMPILER_LAUNCHER`, so the two MSVC Windows -jobs (`build-windows-x86_64`, `build-windows-x86`) **cannot** use the sccache/Depot cache. Rather -than switch the trusted MSVC build, the repo builds the **same CPU natives a second time** with the -**`Ninja Multi-Config`** generator (which *does* honor the launcher) and ships them as a separate -**`ninja-windows`** Maven classifier JAR. **The MSVC build is the default JAR and is kept -permanently** — the Ninja artifact is an additional, cache-accelerated, independently -end-to-end-tested option, not a replacement. (Upstream llama.cpp ships its `windows-cuda` artifact -with Ninja Multi-Config + MSVC, proving the combination works on the same tree.) - -Unlike the CUDA / OpenCL classifiers — which differ by a **GGML backend flag** and route their -output in `CMakeLists.txt` — the Ninja Windows build differs only by **generator/toolchain**, so -there is **no `CMakeLists.txt` change**: both generators emit to the canonical -`src/main/resources/.../Windows/{x86_64,x86}/`. Routing to the classifier tree happens purely at the -CI-download + pom-profile level. Four places wire it together: - -1. **`.github/build.bat`** — sccache probe guard mirroring `build.sh`'s `sccache_can_wrap_compiler()`: - when `USE_CACHE=true` and `sccache` is on PATH, it compiles a trivial TU through `sccache cl.exe`; - only on success does it pass `-DCMAKE_{C,CXX}_COMPILER_LAUNCHER=sccache` and print - `sccache --show-stats`. A missing/crashing sccache falls back to a green uncached build. The MSVC - jobs do not set `USE_CACHE`, so the guard is inert for them. -2. **`.github/workflows/publish.yml`** — build jobs `build-windows-x86_64-ninja` / - `build-windows-x86-ninja` (`windows-2025-vs2026`, `ilammy/msvc-dev-cmd@v1` for the arch env, - sccache v0.16.0 from the GitHub release **zip** + Depot WebDAV, `build.bat -G "Ninja Multi-Config"`), - uploading artifacts `Windows-{x86_64,x86}-ninja` (**not** `*-libraries`, so the `package` job's - `pattern: "*-libraries"` ignores them). `test-java-windows-x86_64-ninja` loads the Ninja DLL via - JNI and runs the full model-backed suite. The `package`, `publish-snapshot`, and `publish-release` - jobs download `Windows-*-ninja` into `src/main/resources_windows_ninja/` and activate the - `windows-ninja` Maven profile. -3. **`pom.xml`** — the `windows-ninja` profile produces a second JAR with `ninja-windows` - from the `${project.build.outputDirectory}_windows_ninja` tree (separate compile pass + resource - copy + classified jar; mirrors the `cuda` / `opencl-android` profiles). Activated only in CI. -4. **`README.md`** — the `ninja-windows` row + dependency snippet in "Choosing the right classifier". - -`src/main/resources_windows_ninja/` is git-ignored (staged by CI, never committed — same policy as -the native libs and the CUDA/OpenCL trees). - -**Local sanity build** (needs MSVC + a Ninja on PATH; sccache optional): +## Windows native classifiers (default Ninja CPU + MSVC classifier + CUDA/Vulkan/OpenCL GPU) + +The Windows native libraries ship in **five** forms. The **default JAR's** Windows natives are now +built with the **`Ninja Multi-Config`** generator (the *default flip*); the Visual Studio / MSVC +build is shipped as the **`msvc-windows`** classifier; and three GPU backends ship as +**`cuda13-windows-x86-64`**, **`vulkan-windows-x86-64`**, and **`opencl-windows-x86-64`** (all +**x86_64 only**, all Ninja). + +**Why Ninja is the default (the flip).** The Visual Studio generator ignores +`CMAKE_{C,CXX}_COMPILER_LAUNCHER`, so only Ninja Multi-Config can front `cl.exe` with sccache over +Depot WebDAV. **Both generators use the same MSVC toolchain** (`cl.exe`, static `/MT` CRT via +`CMAKE_MSVC_RUNTIME_LIBRARY`, same Release flags, same runner), so the produced +`jllama.dll`/`llama.dll`/`ggml.dll` are **functionally equivalent with identical runtime +dependencies** — the only difference is build-system plumbing + caching. Making Ninja the default +gives the most-pulled JAR the sccache cache; MSVC stays available as a classifier for anyone who +wants the Visual-Studio-generator build. (Upstream llama.cpp also builds its Windows artifacts with +Ninja Multi-Config + MSVC.) Both Windows CPU builds are validated end-to-end with the full +model-backed Java suite (`test-java-windows-x86_64` = default/Ninja, `test-java-windows-x86_64-msvc` += MSVC classifier). + +**GPU runtime libraries are NOT bundled.** The GPU JARs ship only `jllama.dll`/`llama.dll`/`ggml.dll` +(plus the embedded backend). The consumer's driver/toolkit must supply the runtime: CUDA needs the +installed CUDA 13 Toolkit (`cudart64_13.dll`/`cublas64_13.dll`/`cublasLt64_13.dll` on `PATH`); Vulkan +needs `vulkan-1.dll` (ships with current GPU drivers); OpenCL needs the vendor ICD +(`System32\OpenCL.dll`). Not bundling = no NVIDIA-EULA redistribution obligation. **GitHub-hosted +Windows runners have NO GPU**, so the GPU jobs **build the artifact only** (no `-DBUILD_TESTING`/`ctest`) +— a GPU-linked `jllama_test.exe` can't even be enumerated on a GPU-less runner (it errors probing for a +device, so `gtest_discover_tests` registers a failing `*_NOT_BUILT` sentinel). The CPU-only C++ unit +suite is fully covered by the `C++ Tests` job + the CPU Windows jobs; model-backed GPU inference is +local / self-hosted. + +Wiring (mirrors the CUDA-Linux / OpenCL-Android classifier pattern): + +1. **`CMakeLists.txt`** — the `if(GGML_CUDA) … elseif(GGML_VULKAN) … elseif(GGML_OPENCL) … else()` + chain is **OS-aware**: CUDA → `resources_windows_cuda` on Windows (else `resources_linux_cuda`), + Vulkan → `resources_windows_vulkan`, OpenCL → `resources_windows_opencl` on Windows (else + `resources_android_opencl`). The default CPU build (both generators) still emits to the canonical + `src/main/resources/.../Windows/{x86_64,x86}/`, so the Ninja-vs-MSVC split is purely a + CI-artifact-name + pom-profile concern (no CMake change for it). +2. **`.github/build.bat`** — the sccache probe guard (mirrors `build.sh`) wraps the **cl.exe** C/C++ TUs + only. Unlike `build.sh` (Linux), it does **not** wrap `nvcc`: sccache on Windows can't parse the nvcc + command line (`sccache: error: Could not parse shell line`) and fails every `.cu` compile, so CUDA + device code builds with nvcc directly (uncached). `build.bat` also propagates a `cmake --build` + failure as a non-zero exit (a prior bug let a failed CUDA build exit 0 → empty artifact → late + `package` failure); the GPU upload steps additionally use `if-no-files-found: error` as a backstop. +3. **`.github/build_opencl_windows.bat`** — stages Khronos OpenCL-Headers + builds OpenCL-ICD-Loader + (`OpenCL.lib`), then delegates to `build.bat` with `-DOpenCL_INCLUDE_DIR`/`-DOpenCL_LIBRARY` + (the Windows analogue of `build_opencl_android.sh`). +4. **`.github/workflows/publish.yml`** — build jobs (all `windows-2025-vs2026`, `ilammy/msvc-dev-cmd@v1`, + sccache v0.16.0 zip + Depot WebDAV): + - `build-windows-x86_64` / `build-windows-x86` — **Ninja CPU**, artifacts `Windows-{arch}-libraries` + → picked up by the `package` job's `pattern: "*-libraries"` into the **default** tree. + - `build-windows-x86_64-msvc` / `build-windows-x86-msvc` — **MSVC CPU**, artifacts `Windows-{arch}-msvc`. + - `build-windows-x86_64-cuda` — `Jimver/cuda-toolkit@v0.2.35` (CUDA `13.2.0`) + `-DGGML_CUDA=ON`, + artifact `Windows-x86_64-cuda`. + - `build-windows-x86_64-vulkan` — `jakoch/install-vulkan-sdk-action` + `-DGGML_VULKAN=ON`, artifact + `Windows-x86_64-vulkan`. + - `build-windows-x86_64-opencl` — `build_opencl_windows.bat -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON`, + artifact `Windows-x86_64-opencl`. + The `package`, `publish-snapshot`, and `publish-release` jobs download each non-default artifact into + its `src/main/resources_windows_{msvc,cuda,vulkan,opencl}/` tree and activate the + `windows-msvc,cuda-windows,vulkan-windows,opencl-windows` Maven profiles. +5. **`pom.xml`** — profiles `windows-msvc` / `cuda-windows` / `vulkan-windows` / `opencl-windows`, + each a separate compile pass + resource copy + classified jar (classifiers `msvc-windows` / + `cuda13-windows-x86-64` / `vulkan-windows-x86-64` / `opencl-windows-x86-64`). Activated only in CI. +6. **`README.md`** — the classifier table + dependency snippets in "Choosing the right classifier". + +`src/main/resources_windows_{msvc,cuda,vulkan,opencl}/` are git-ignored (staged by CI, never committed). + +**First CI run (PR #276, run 28327740376):** the default Ninja CPU flip, the MSVC classifier, and the +**OpenCL** job were green on the first try. Two GPU jobs needed a toolchain fix: **CUDA** failed with +`Version not available: 13.0.0` because the pinned `Jimver/cuda-toolkit@v0.2.24` predated CUDA 13.x → +bumped to `@v0.2.35` + `13.2.0` (matches the Linux pin, classifier stays `cuda13-…`); **Vulkan** failed +`find_package(Vulkan)` because `humbletim/install-vulkan-sdk` set `VULKAN_SDK` but laid the SDK out in a +way CMake's `FindVulkan` couldn't read → switched to `jakoch/install-vulkan-sdk-action` (purpose-built, +FindVulkan-compatible). Because all five Windows build jobs are in the `package`/publish `needs:` graph, a +GPU-toolchain failure blocks packaging — the same release-gating policy the Linux-CUDA / Android-OpenCL +jobs already follow. + +**Local sanity builds** (need MSVC + Ninja on PATH; sccache optional; GPU builds also need the matching SDK): ```bat mvn -q compile .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON ctest --test-dir build --output-on-failure +:: GPU (needs the matching SDK installed + on PATH): +.github\build.bat -G "Ninja Multi-Config" -DGGML_CUDA=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 +.github\build.bat -G "Ninja Multi-Config" -DGGML_VULKAN=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 +.github\build_opencl_windows.bat -G "Ninja Multi-Config" -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 ``` ## WebUI (llama.cpp Svelte UI) embedding @@ -348,9 +393,10 @@ dockcross wrapper only forwards host env it is explicitly told to via `-e`. The version is the `SCCACHE_DL_VERSION` knob in `build.sh` (default **0.16.0**; overridable per-job to try a different build against a container that crashed another). **Windows** is handled separately (the Visual Studio generator ignores `CMAKE_*_COMPILER_LAUNCHER`): see -"Windows Ninja artifact" below — the cached path uses the **Ninja Multi-Config** generator with a -`build.bat` sccache probe and a direct sccache zip download (not `mozilla-actions/sccache-action`), -shipped as a parallel `ninja-windows` classifier JAR while the MSVC default stays the trusted build. +"Windows native classifiers" below — the **default** Windows CPU JAR now uses the **Ninja +Multi-Config** generator (so it caches) with a `build.bat` sccache probe and a direct sccache zip +download (not `mozilla-actions/sccache-action`); the uncached MSVC build ships as the `msvc-windows` +classifier, and the three Windows GPU classifiers (CUDA/Vulkan/OpenCL) use the same Ninja path. **Cross-repo scope.** This Depot/sccache compiler cache makes sense only for java-llama.cpp — it is the only sibling repo with a native (C++/JNI) build. It does not apply to the pure-Maven diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e3e7846..ebba2833 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -243,12 +243,33 @@ if(NOT OS_ARCH) message(FATAL_ERROR "Could not determine CPU architecture") endif() +# Backend + OS routing for the native-library output tree. Each GPU backend ships +# under its own Maven classifier, so it must land in a backend-specific resource +# root (the default CPU tree stays src/main/resources/). The GPU branches are +# OS-aware because the same GGML flag is used on more than one platform: +# - GGML_CUDA -> Linux (resources_linux_cuda) AND Windows (resources_windows_cuda) +# - GGML_OPENCL -> Android (resources_android_opencl) AND Windows (resources_windows_opencl) +# - GGML_VULKAN -> Windows only (resources_windows_vulkan) +# The classifier->tree mapping is mirrored by the matching Maven profile in pom.xml. if(GGML_CUDA) - set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_linux_cuda/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) - message(STATUS "GPU (CUDA Linux) build - Installing files to ${JLLAMA_DIR}") + if(OS_NAME STREQUAL "Windows") + set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_windows_cuda/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) + message(STATUS "GPU (CUDA Windows) build - Installing files to ${JLLAMA_DIR}") + else() + set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_linux_cuda/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) + message(STATUS "GPU (CUDA Linux) build - Installing files to ${JLLAMA_DIR}") + endif() +elseif(GGML_VULKAN) + set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_windows_vulkan/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) + message(STATUS "GPU (Vulkan) build - Installing files to ${JLLAMA_DIR}") elseif(GGML_OPENCL) - set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_android_opencl/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) - message(STATUS "GPU (OpenCL Android) build - Installing files to ${JLLAMA_DIR}") + if(OS_NAME STREQUAL "Windows") + set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_windows_opencl/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) + message(STATUS "GPU (OpenCL Windows) build - Installing files to ${JLLAMA_DIR}") + else() + set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources_android_opencl/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) + message(STATUS "GPU (OpenCL Android) build - Installing files to ${JLLAMA_DIR}") + endif() else() set(JLLAMA_DIR ${CMAKE_SOURCE_DIR}/src/main/resources/net/ladenthin/llama/${OS_NAME}/${OS_ARCH}) message(STATUS "CPU build - Installing files to ${JLLAMA_DIR}") diff --git a/README.md b/README.md index 86b51236..a46d5f0e 100644 --- a/README.md +++ b/README.md @@ -162,17 +162,22 @@ If any of these match your platform, you can include the Maven dependency and ge ### Choosing the right classifier -The Maven coordinate `net.ladenthin:llama` publishes one default JAR (CPU-only) -plus optional JARs selected via a Maven ``: two GPU/accelerator -builds and one alternate-toolchain Windows build. Pick at most one GPU/accelerator -classifier — those are mutually exclusive — and optionally the Windows build. +The Maven coordinate `net.ladenthin:llama` publishes one default JAR (CPU-only; +its Windows natives are built with the Ninja Multi-Config + MSVC toolchain) plus +optional JARs selected via a Maven ``: three Windows GPU builds +(CUDA / Vulkan / OpenCL), the Linux CUDA and Android OpenCL builds, and an +alternate-toolchain MSVC Windows CPU build. Pick at most one GPU/accelerator +classifier — those are mutually exclusive — and optionally a CPU Windows build. | Classifier | Backend | Target platform | Runtime requirement | |---|---|---|---| -| _(none)_ | CPU | Linux x86-64 / aarch64, macOS x86-64 / aarch64, Windows x86-64 (MSVC / Visual Studio generator), Android aarch64 (CPU) | A JDK 8+ JVM. **Linux `aarch64` additionally requires glibc ≥ 2.39** (e.g. Ubuntu 24.04+, Debian 13+) — it is built natively on `ubuntu-24.04-arm`, matching upstream llama.cpp's own ARM binaries; older-glibc ARM hosts (Ubuntu 22.04, Debian 12, RHEL 8/9, Amazon Linux 2023) are not supported. Linux x86-64 keeps a glibc 2.17 floor (manylinux2014). | +| _(none)_ | CPU | Linux x86-64 / aarch64, macOS x86-64 / aarch64, Windows x86-64 / x86 (Ninja Multi-Config + MSVC), Android aarch64 (CPU) | A JDK 8+ JVM. **Linux `aarch64` additionally requires glibc ≥ 2.39** (e.g. Ubuntu 24.04+, Debian 13+) — it is built natively on `ubuntu-24.04-arm`, matching upstream llama.cpp's own ARM binaries; older-glibc ARM hosts (Ubuntu 22.04, Debian 12, RHEL 8/9, Amazon Linux 2023) are not supported. Linux x86-64 keeps a glibc 2.17 floor (manylinux2014). | +| `msvc-windows` | CPU (MSVC / Visual Studio generator) | Windows x86-64 and x86 | None beyond a JDK 8+ JVM. Same CPU backend as the default JAR's Windows natives, but compiled with the Visual Studio generator instead of `Ninja Multi-Config`. Both use the same MSVC toolchain (static `/MT` CRT), so they are functionally equivalent — provided as an alternate-toolchain option. | +| `cuda13-windows-x86-64` | CUDA 13 | Windows x86-64 with NVIDIA GPU | NVIDIA driver + CUDA 13 Toolkit installed on the host (`cudart64_13.dll`, `cublas64_13.dll`, `cublasLt64_13.dll` resolvable on `PATH`). The runtime libraries are **not bundled** in the JAR; native-library load fails with `UnsatisfiedLinkError` if they are absent. No CPU fallback. | +| `vulkan-windows-x86-64` | Vulkan | Windows x86-64 with a Vulkan 1.2+ GPU (NVIDIA / AMD / Intel) | A Vulkan runtime (`vulkan-1.dll`), which current GPU drivers install. No Vulkan SDK is needed at runtime. The most portable Windows GPU option (vendor-independent). | +| `opencl-windows-x86-64` | OpenCL | Windows x86-64 with an OpenCL 2.0+ GPU | A vendor OpenCL ICD (`OpenCL.dll`, installed by the GPU driver). **Note:** the GGML OpenCL backend is Adreno-tuned; on desktop GPUs CUDA or Vulkan are better supported. | | `cuda13-linux-x86-64` | CUDA 13 | Linux x86-64 with NVIDIA GPU | NVIDIA driver + CUDA 13 runtime libraries (`libcudart.so.13`, `libcublas.so.13`) installed on the host. The shared library is dynamically linked against them and will fail to `dlopen` if they are absent — there is no automatic fallback to CPU. | | `opencl-android-aarch64` | OpenCL (Adreno) | Android aarch64 with Qualcomm Adreno GPU | A device-supplied OpenCL ICD (`libOpenCL.so`). Devices without an ICD (e.g. most non-Snapdragon Android hardware) must use the default CPU JAR. | -| `ninja-windows` | CPU (Ninja Multi-Config + MSVC) | Windows x86-64 and x86 | None beyond a JDK 8+ JVM. Same CPU backend as the default JAR's Windows natives, but compiled with the `Ninja Multi-Config` generator (sccache-cached in CI) instead of the Visual Studio generator. Provided so both Windows builds are available; functionally equivalent for normal use. | ```xml @@ -198,23 +203,52 @@ classifier — those are mutually exclusive — and optionally the Windows build opencl-android-aarch64 - + net.ladenthin llama 5.0.2 - ninja-windows + cuda13-windows-x86-64 + + + + + net.ladenthin + llama + 5.0.2 + vulkan-windows-x86-64 + + + + + net.ladenthin + llama + 5.0.2 + opencl-windows-x86-64 + + + + + net.ladenthin + llama + 5.0.2 + msvc-windows ``` > [!IMPORTANT] -> The CUDA JAR is **CUDA-only at runtime**. On a CPU-only host (no NVIDIA -> driver or no CUDA 13 runtime libraries installed) the JVM will fail at -> native-library load time with `UnsatisfiedLinkError` caused by an -> underlying `dlopen` failure on `libcudart.so.13`. If you want to ship a -> single artifact that works on both CPU and CUDA hosts, depend on the -> default (CPU) JAR; users who want GPU acceleration must compile locally -> with `-DGGML_CUDA=ON` (see [Setup required](#setup-required)). +> The GPU JARs are **GPU-only at runtime**. On a host without the matching +> GPU driver/runtime the JVM fails at native-library load time with +> `UnsatisfiedLinkError`: the CUDA JARs are dynamically linked against the +> CUDA runtime (`libcudart.so.13` on Linux, `cudart64_13.dll` / +> `cublas64_13.dll` / `cublasLt64_13.dll` on Windows — the Windows CUDA +> runtime is **not bundled**, install the CUDA 13 Toolkit), the Vulkan JAR +> needs a Vulkan runtime (`vulkan-1.dll`, shipped with current GPU drivers), +> and the OpenCL JARs need a vendor OpenCL ICD. There is no automatic +> fallback to CPU. If you want a single artifact that works on both CPU and +> GPU hosts, depend on the default (CPU) JAR; users who want GPU acceleration +> on an unlisted platform must compile locally with the matching `-DGGML_*=ON` +> flag (see [Setup required](#setup-required)). > [!NOTE] > Android `armeabi-v7a` (32-bit ARM) is **not** published. Only 64-bit diff --git a/TODO.md b/TODO.md index d28f1d1b..66de274f 100644 --- a/TODO.md +++ b/TODO.md @@ -175,46 +175,51 @@ primary goal: agentic tool-calling with Qwen): JDK-based `OpenAiCompatServer` (which is complete and the primary surface); value is shipping the full llama.cpp server + WebUI in-process without a separate `llama-server` binary. JNI + C++ work. -### Windows compiler cache (sccache) — dual build shipped (MSVC default + Ninja classifier) - -**Design decision (do not revisit without the owner): the MSVC / Visual Studio build is the -default JAR and is kept permanently — never retired.** The Ninja Multi-Config build is shipped -*alongside* it as the `ninja-windows` classifier JAR, never as a replacement. The loss of the -sccache cache on the MSVC build is accepted; the Ninja build exists so a cache-accelerated, -independently validated second Windows artifact is available for users to compare/adopt. - -**Why two builds.** The cache mechanism is the CMake *compiler launcher* -(`-DCMAKE_C_COMPILER_LAUNCHER=sccache`). **The Visual Studio generator ignores it entirely** -(only Ninja/Makefile generators honor it), so the MSVC jobs can never cache. The Ninja -Multi-Config generator *does* honor it (upstream llama.cpp `b9739` ships `windows-cuda` this way, -proving Ninja Multi-Config + MSVC works on the same tree). The two builds produce **different -`jllama.dll`s**, so they cannot coexist at the same resource path in one JAR — hence the classifier. - -**What shipped (this branch):** -- **4 Windows build jobs, all permanent:** `build-windows-x86_64`, `build-windows-x86` (MSVC, - default JAR) and `build-windows-x86_64-ninja`, `build-windows-x86-ninja` (Ninja + sccache/Depot). -- **Both tested end-to-end:** all four run the C++ unit tests (`ctest`); `test-java-windows-x86_64` - (MSVC) and the new `test-java-windows-x86_64-ninja` (Ninja) both load the DLL via JNI and run the - full model-backed Java suite. -- **`.github/build.bat`** — sccache probe guard (mirrors `build.sh`'s `sccache_can_wrap_compiler()`): - `USE_CACHE=true` + `sccache` on PATH + a trivial TU compiling through `sccache cl.exe` ⇒ - `-DCMAKE_{C,CXX}_COMPILER_LAUNCHER=sccache` + `sccache --show-stats`; else green uncached. Inert - for the MSVC jobs (they don't set `USE_CACHE`). -- **`pom.xml`** — `windows-ninja` profile → `ninja-windows` JAR from - `${project.build.outputDirectory}_windows_ninja` (mirrors the `cuda` / `opencl-android` profiles). -- **`publish.yml`** — the `package`, `publish-snapshot`, `publish-release` jobs download - `Windows-{x86_64,x86}-ninja` into `src/main/resources_windows_ninja/` and activate the - `windows-ninja` profile; the Ninja build + Java-test jobs are in the `package` `needs:` graph. -- Docs: `README.md` classifier table + `CLAUDE.md` "Windows Ninja artifact" section. - -**Verification — DONE (PR #248).** The Ninja jobs are green and cache-warm: `Build and Test -Windows … (Ninja … sccache, eval)` builds + `ctest` pass, and `Java Tests Windows 2025 x86_64 -(Ninja, eval)` loads the DLL via JNI and runs the full model-backed suite green (after the b9739 -arg-parse patch landed). `sccache --show-stats` confirms cache hits on the Ninja jobs. - -**Optional follow-up:** smoke-test that the *published* `ninja-windows` classifier JAR loads its DLL -on a clean Windows host. Publishing is gated behind `publish_to_central`, so a broken Windows job -blocks the release before any artifact reaches Central/GitHub Releases. +### Windows native classifiers — default flip (Ninja default + MSVC classifier) + CUDA/Vulkan/OpenCL GPU + +**Design decision UPDATED by the owner (supersedes the earlier "MSVC is the permanent default" +note): the default Windows CPU JAR is now the Ninja Multi-Config build, and the MSVC / Visual +Studio build ships as the `msvc-windows` classifier.** Rationale: both generators use the same MSVC +toolchain (`cl.exe`, static `/MT` CRT) on the same runner, so the produced DLLs are functionally +equivalent with identical runtime dependencies — the only difference is build-system plumbing + +sccache caching. Making Ninja the default gives the most-pulled JAR the cache; MSVC stays available +as a classifier. Three Windows GPU classifiers were added at the same time (x86_64 only, all Ninja): +`cuda13-windows-x86-64`, `vulkan-windows-x86-64`, `opencl-windows-x86-64`. + +**Why the cache needs Ninja.** The cache mechanism is the CMake *compiler launcher* +(`-DCMAKE_C_COMPILER_LAUNCHER=sccache`); the Visual Studio generator ignores it entirely, only +Ninja/Makefile generators honor it. Upstream llama.cpp also builds its Windows artifacts with Ninja +Multi-Config + MSVC. + +**What shipped (this branch — pending first CI validation):** +- **CPU build jobs:** `build-windows-x86_64` / `build-windows-x86` are now **Ninja** (default, + artifacts `Windows-{arch}-libraries`); `build-windows-x86_64-msvc` / `build-windows-x86-msvc` are + **MSVC** (artifacts `Windows-{arch}-msvc`). `test-java-windows-x86_64` (default/Ninja) and + `test-java-windows-x86_64-msvc` both load the DLL via JNI and run the full model-backed suite. +- **GPU build jobs (x86_64, Ninja, build the artifact only — runners have no GPU, and a + GPU-linked jllama_test can't be enumerated there; C++ suite runs on the CPU jobs):** + `build-windows-x86_64-cuda` (`Jimver/cuda-toolkit@v0.2.35` CUDA `13.2.0` + `-DGGML_CUDA=ON`), + `build-windows-x86_64-vulkan` (`jakoch/install-vulkan-sdk-action` + `-DGGML_VULKAN=ON`), + `build-windows-x86_64-opencl` (`build_opencl_windows.bat` stages the ICD loader + `-DGGML_OPENCL=ON`). +- **`CMakeLists.txt`** — OS-aware backend routing (CUDA/OpenCL → Windows trees, new Vulkan branch). +- **`.github/build.bat`** — also wraps nvcc with sccache for CUDA builds. +- **`.github/build_opencl_windows.bat`** — new, Windows analogue of `build_opencl_android.sh`. +- **`pom.xml`** — profiles `windows-msvc` / `cuda-windows` / `vulkan-windows` / `opencl-windows` + (classifiers `msvc-windows` / `cuda13-windows-x86-64` / `vulkan-windows-x86-64` / `opencl-windows-x86-64`). +- **`publish.yml`** — the `package` / `publish-snapshot` / `publish-release` jobs download each + non-default artifact into `src/main/resources_windows_{msvc,cuda,vulkan,opencl}/` and activate the + four profiles; all five Windows build jobs are in the `package` `needs:` graph. +- Docs: `README.md` classifier table + `CLAUDE.md` "Windows native classifiers" section. + +**Verification — first CI run done (PR #276, run 28327740376).** Green on the first try: default Ninja +CPU flip (x64+x86), MSVC classifier (x64+x86), and the **OpenCL** GPU job (`build_opencl_windows.bat` +ICD staging works). Two GPU jobs were fixed after the first run: **CUDA** (`Version not available: +13.0.0` → bumped `Jimver/cuda-toolkit` `v0.2.24`→`v0.2.35` + `13.2.0`) and **Vulkan** +(`find_package(Vulkan)` couldn't read the `humbletim` SDK layout → switched to +`jakoch/install-vulkan-sdk-action`). Re-run pending to confirm both fixes. + +**Optional follow-up:** smoke-test that each *published* classifier JAR loads its DLL on a clean +Windows host with the matching GPU driver/toolkit installed. **Reference notes:** - Cache backend is **sccache + Depot WebDAV** (consistent with the other 8 jobs — one token, shared diff --git a/pom.xml b/pom.xml index a04265f0..6c451877 100644 --- a/pom.xml +++ b/pom.xml @@ -913,24 +913,25 @@ SPDX-License-Identifier: MIT - - windows-ninja + + windows-msvc org.apache.maven.plugins maven-compiler-plugin - + - windows-ninja + windows-msvc compile compile @@ -946,7 +947,7 @@ SPDX-License-Identifier: MIT src/main/cpp - ${project.build.outputDirectory}_windows_ninja + ${project.build.outputDirectory}_windows_msvc @@ -956,18 +957,18 @@ SPDX-License-Identifier: MIT - copy-resources-windows-ninja + copy-resources-windows-msvc process-classes copy-resources - ${project.build.outputDirectory}_windows_ninja + ${project.build.outputDirectory}_windows_msvc - ${basedir}/src/main/resources_windows_ninja/ + ${basedir}/src/main/resources_windows_msvc/ **/*.* @@ -984,15 +985,258 @@ SPDX-License-Identifier: MIT - windows-ninja + windows-msvc package jar - ninja-windows + msvc-windows - ${project.build.outputDirectory}_windows_ninja + ${project.build.outputDirectory}_windows_msvc + + + + + + + + + + + cuda-windows + + + + org.apache.maven.plugins + maven-compiler-plugin + + + cuda-windows + compile + + compile + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_cuda + + + + + + maven-resources-plugin + + + copy-resources-cuda-windows + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_cuda + + + + ${basedir}/src/main/resources_windows_cuda/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + cuda-windows + package + + jar + + + cuda13-windows-x86-64 + + ${project.build.outputDirectory}_windows_cuda + + + + + + + + + + + vulkan-windows + + + + org.apache.maven.plugins + maven-compiler-plugin + + + vulkan-windows + compile + + compile + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_vulkan + + + + + + maven-resources-plugin + + + copy-resources-vulkan-windows + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_vulkan + + + + ${basedir}/src/main/resources_windows_vulkan/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + vulkan-windows + package + + jar + + + vulkan-windows-x86-64 + + ${project.build.outputDirectory}_windows_vulkan + + + + + + + + + + + opencl-windows + + + + org.apache.maven.plugins + maven-compiler-plugin + + + opencl-windows + compile + + compile + + + + module-info.java + + + -h + src/main/cpp + + + ${project.build.outputDirectory}_windows_opencl + + + + + + maven-resources-plugin + + + copy-resources-opencl-windows + process-classes + + copy-resources + + + + ${project.build.outputDirectory}_windows_opencl + + + + ${basedir}/src/main/resources_windows_opencl/ + + **/*.* + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + opencl-windows + package + + jar + + + opencl-windows-x86-64 + + ${project.build.outputDirectory}_windows_opencl