The-OpenROAD-Project · ApeachM · May 21, 2026 · May 24, 2026 · May 24, 2026 · May 25, 2026
@@ -48,6 +48,13 @@ option(USE_SYSTEM_ABC "Use system shared ABC library" OFF)
 # Allow disabling tests
 option(ENABLE_TESTS "Enable OpenROAD tests" ON)
 
+# Opt-in GPU acceleration via Kokkos. The actual compute backend (CUDA, HIP,
+# SYCL, or host-only OpenMP/Threads) is determined by the installed Kokkos
+# package; OpenROAD inspects Kokkos_ENABLE_* and turns on the matching CMake
+# language and dependencies automatically. See the per-module CMakeLists for
+# how individual subsystems wire their GPU sources.
+option(ENABLE_GPU "Enable GPU acceleration via Kokkos" OFF)
+
 # Allow enabling address sanitizer
 option(ASAN "Enable Address Sanitizer" OFF)
 
@@ -92,6 +99,13 @@ if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE RELEASE)
 endif()
 
+# GPU backend wiring (opt-in). All Kokkos / CUDA / HIP / SYCL detection,
+# compiler probing, and language enablement live in cmake/KokkosBackend.cmake
+# and are loaded only when the user opts in via ENABLE_GPU=ON.
+if(ENABLE_GPU)
+  include(KokkosBackend)
+endif()
+
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.3.0")
     message(FATAL_ERROR "Insufficient gcc version. Found ${CMAKE_CXX_COMPILER_VERSION}, but require  >= 8.3.0.")

@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2026, The OpenROAD Authors
+
+# Kokkos GPU backend wiring for OpenROAD. Included from the root
+# CMakeLists.txt only when ENABLE_GPU=ON; not loaded otherwise.
+#
+# Discovers the user's Kokkos install, inherits its compute backend, turns
+# on the matching CMake language so downstream targets can mark kernel
+# sources with set_source_files_properties(... LANGUAGE CUDA|HIP), and
+# applies the small set of nvcc / fmt / host-compiler workarounds that the
+# CUDA backend currently needs in modern Linux toolchains. Per-module
+# CMakeLists (e.g. src/gpl) key off ENABLE_GPU and Kokkos_ENABLE_*; they
+# do not need to call find_package(Kokkos) or enable_language() themselves.
+
+find_package(Kokkos QUIET)
+if(NOT Kokkos_FOUND)
+  message(FATAL_ERROR
+    "OpenROAD: ENABLE_GPU=ON requires the Kokkos package to be "
+    "installed and discoverable by CMake, but Kokkos was not found.\n"
+    "  - If Kokkos is already installed: pass "
+    "-DKokkos_ROOT=/path/to/kokkos (or extend CMAKE_PREFIX_PATH).\n"
+    "  - If not: build and install Kokkos from "
+    "https://github.com/kokkos/kokkos with the desired backend "
+    "(CUDA / HIP / SYCL / OpenMP) and a target architecture that "
+    "matches the host GPU.\n"
+    "  - A future etc/DependencyInstaller.sh -gpu option will "
+    "automate this step.")
+endif()
+
+# KokkosFFT — required by the gpl GPU FFT backend (src/gpl/src/gpu/dct.cpp).
+# A separate package from Kokkos core.
+find_package(KokkosFFT QUIET)
+if(NOT KokkosFFT_FOUND)
+  message(FATAL_ERROR
+    "ENABLE_GPU=ON requires KokkosFFT, which was not found.\n"
+    "  - Install KokkosFFT (https://github.com/kokkos/kokkos-fft) against\n"
+    "    your Kokkos build, then re-configure with -DKokkosFFT_ROOT=<prefix>.\n"
+    "  - A future etc/DependencyInstaller.sh -gpu will install Kokkos and\n"
+    "    KokkosFFT together.")
+endif()
+
+message(STATUS "OpenROAD: GPU acceleration enabled (Kokkos ${Kokkos_VERSION})")
+
+if(Kokkos_ENABLE_CUDA)
+  # Auto-discover nvcc when the user has CUDA installed at a standard
+  # location but their environment does not expose it on PATH (common
+  # with IDE-launched configures: the bundled CMake does not inherit
+  # the shell PATH). enable_language(CUDA) below would otherwise abort
+  # with "No CMAKE_CUDA_COMPILER could be found" even though Kokkos's
+  # find_package already located the toolkit.
+  if(NOT DEFINED CMAKE_CUDA_COMPILER AND NOT DEFINED ENV{CUDACXX})
+    find_program(_OPENROAD_NVCC nvcc
+      HINTS ENV CUDA_HOME ENV CUDA_PATH ENV CUDA_ROOT
+            /usr/local/cuda/bin
+            /usr/local/cuda-13.0/bin
+            /usr/local/cuda-12.8/bin /usr/local/cuda-12.0/bin
+            /opt/cuda/bin
+    )
+    if(_OPENROAD_NVCC)
+      set(CMAKE_CUDA_COMPILER "${_OPENROAD_NVCC}" CACHE FILEPATH "")
+      message(STATUS "OpenROAD: auto-discovered nvcc at ${_OPENROAD_NVCC}")
+    endif()
+  endif()
+  # nvcc < 13 cannot parse glibc 2.38+'s _Float128 type that ships with
+  # gcc 13+'s C++ standard library headers (math.h template specialization
+  # for __iseqsig_type<_Float128>). When a known-broken pairing is detected,
+  # pin a compatible older g++ as the CUDA host compiler (the system C++
+  # compiler stays unchanged for non-CUDA TUs). Override is always
+  # available via -DCMAKE_CUDA_HOST_COMPILER or CUDAHOSTCXX.
+  if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}
+     AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
+     AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0"
+     AND _OPENROAD_NVCC)
+    execute_process(
+      COMMAND "${_OPENROAD_NVCC}" --version
+      OUTPUT_VARIABLE _OPENROAD_NVCC_VERSION_OUTPUT
+      ERROR_QUIET
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(_OPENROAD_NVCC_VERSION_OUTPUT MATCHES "release ([0-9]+)")
+      set(_OPENROAD_NVCC_MAJOR "${CMAKE_MATCH_1}")
+      if(_OPENROAD_NVCC_MAJOR LESS 13)
+        foreach(_OPENROAD_GXX_VER 12 11)
+          find_program(_OPENROAD_CUDAHOST g++-${_OPENROAD_GXX_VER}
+            HINTS /usr/bin /usr/local/bin)
+          if(_OPENROAD_CUDAHOST)
+            set(CMAKE_CUDA_HOST_COMPILER "${_OPENROAD_CUDAHOST}"
+              CACHE FILEPATH "")
+            message(STATUS
+              "OpenROAD: pinning CUDA host compiler to "
+              "${_OPENROAD_CUDAHOST} (nvcc ${_OPENROAD_NVCC_MAJOR}.x + "
+              "glibc/gcc 13+ _Float128 compat)")
+            break()
+          endif()
+          unset(_OPENROAD_CUDAHOST CACHE)
+        endforeach()
+        if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER)
+          message(FATAL_ERROR
+            "OpenROAD: nvcc ${_OPENROAD_NVCC_MAJOR}.x cannot parse "
+            "_Float128 declarations in glibc 2.38+ system headers used "
+            "by gcc ${CMAKE_CXX_COMPILER_VERSION}, and no compatible "
+            "g++-12 / g++-11 was found in /usr/bin or /usr/local/bin. "
+            "Install one (e.g. apt install g++-12) or set "
+            "-DCMAKE_CUDA_HOST_COMPILER=/path/to/older-g++ explicitly.")
+        endif()
+      endif()
+    endif()
+  endif()
+  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
+    if(DEFINED Kokkos_CUDA_ARCHITECTURES
+       AND NOT "${Kokkos_CUDA_ARCHITECTURES}" STREQUAL "")
+      set(CMAKE_CUDA_ARCHITECTURES "${Kokkos_CUDA_ARCHITECTURES}")
+    else()
+      message(FATAL_ERROR
+        "OpenROAD: ENABLE_GPU=ON with Kokkos CUDA backend, but the "
+        "Kokkos package does not advertise Kokkos_CUDA_ARCHITECTURES "
+        "and CMAKE_CUDA_ARCHITECTURES was not provided. Set "
+        "-DCMAKE_CUDA_ARCHITECTURES=<arch> explicitly (e.g. 89 for "
+        "RTX 4070, 120 for RTX 5090) or rebuild Kokkos with the "
+        "target architecture baked in.")
+    endif()
+  endif()
+  enable_language(CUDA)
+  find_package(CUDAToolkit REQUIRED)
+  message(STATUS "OpenROAD: CUDA backend (arch=${CMAKE_CUDA_ARCHITECTURES})")
+  # A GPU driver (the kernel module exposing libcuda.so.1) is needed only to
+  # *run* CUDA code, never to build it -- nvcc cross-compiles device code on a
+  # host with no GPU. Note its absence so the resulting libcuda.so.1 load
+  # errors on this host (e.g. ctest, or running openroad) read as expected
+  # rather than as a misconfiguration. This is informational only: a GPU build
+  # on a driverless host is a supported cross-compile workflow, not an error.
+  if(NOT EXISTS "/proc/driver/nvidia")
+    message(STATUS
+      "OpenROAD: no NVIDIA driver on this host -- GPU code is being "
+      "cross-compiled. Run the GPU binaries and tests on a GPU machine.")
+  endif()
+  # nvcc 12.8 cannot parse fmt 11's nontype-template-parameter user-defined
+  # literals (fmt/bundled/format.h: operator""_a with fixed_string). The
+  # legacy literal fallback is still available; opt into it for CUDA TUs
+  # only. Project-wide CXX compilation is unaffected.
+  add_compile_definitions(
+    $<$<COMPILE_LANGUAGE:CUDA>:FMT_USE_NONTYPE_TEMPLATE_ARGS=0>)
+  # On aarch64, Boost's unordered_flat_map detects __ARM_NEON and includes
+  # <arm_neon.h> for SIMD-accelerated hashing.  nvcc cannot parse gcc's
+  # arm_neon.h (it contains gcc-specific intrinsics), so disable the NEON
+  # path for CUDA TUs.  The CPU TUs (compiled by g++) are unaffected.
+  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64")
+    add_compile_definitions(
+      $<$<COMPILE_LANGUAGE:CUDA>:BOOST_UNORDERED_DISABLE_NEON>)
+  endif()
+elseif(Kokkos_ENABLE_HIP)
+  enable_language(HIP)
+  message(STATUS "OpenROAD: HIP backend")
+elseif(Kokkos_ENABLE_SYCL)
+  message(STATUS "OpenROAD: SYCL backend (driven by Kokkos host compiler)")
+else()
+  message(STATUS
+          "OpenROAD: host-only Kokkos backend (Serial / OpenMP / Threads)")
+endif()
@@ -38,11 +38,19 @@ cc_library(
     name = "gpl",
     srcs = [
         "src/AbstractGraphics.cpp",
+        "src/backendContext.h",
+        "src/densityGradient.cpp",
+        "src/densityGradientBackend.h",
         "src/fft.cpp",
         "src/fft.h",
+        "src/fftBackend.h",
         "src/fftsg.cpp",
         "src/fftsg2d.cpp",
+        "src/gpu/deviceState.h",
+        "src/gpu/nesterovDeviceContext.h",
         "src/graphicsNone.cpp",
+        "src/hpwl.cpp",
+        "src/hpwlBackend.h",
         "src/initialPlace.cpp",
         "src/initialPlace.h",
         "src/mbff.cpp",
@@ -55,6 +63,8 @@ cc_library(
         "src/solver.h",
         "src/timingBase.cpp",
         "src/timingBase.h",
+        "src/wirelengthGradient.cpp",
+        "src/wirelengthGradientBackend.h",
     ],
     hdrs = [
         "include/gpl/Replace.h",

@@ -34,13 +34,91 @@ add_library(gpl_lib
   src/fft.cpp
   src/fftsg.cpp
   src/fftsg2d.cpp
+  src/hpwl.cpp
+  src/wirelengthGradient.cpp
+  src/densityGradient.cpp
   src/routeBase.cpp
   src/timingBase.cpp
   src/graphicsNone.cpp
   src/solver.cpp
   src/mbff.cpp
 )
 
+# --- HPWL & FFT backends: runtime switch (Strategy + Factory) ---
+# The CPU backends (CpuHpwlBackend in src/hpwl.cpp, CpuFftBackend in
+# src/fft.cpp, + the Ooura src/fftsg*.cpp) are always compiled. When
+# ENABLE_GPU=ON the Kokkos GPU backends in src/gpu/ are also compiled in;
+# makeHpwlBackend() / makeFftBackend() pick the backend per process at run
+# time (gpl::gpuEnabled(), driven by the ENABLE_GPU env var). ENABLE_GPU is a
+# compile definition gating the #ifdef in those two factories; the consumer
+# headers (nesterovBase.h, fft.h) stay preprocessor-free. gpu/ is a
+# file-layout subdirectory only (no nested CMakeLists.txt) so kernel build
+# settings stay in this module's CMakeLists with the rest of gpl_lib.
+if(ENABLE_GPU)
+  target_sources(gpl_lib PRIVATE
+    src/gpu/gpuHpwlBackend.cpp
+    src/gpu/gpuRuntime.cpp
+    src/gpu/gpuFftBackend.cpp
+    src/gpu/poissonSolver.cpp
+    src/gpu/dct.cpp
+    src/gpu/deviceState.cpp
+    src/gpu/gpuWirelengthGradientBackend.cpp
+    src/gpu/wirelengthOp.cpp
+    src/gpu/gpuDensityGradientBackend.cpp
+    src/gpu/densityOp.cpp
+    src/gpu/nesterovOp.cpp
+    src/gpu/nesterovDeviceContext.cpp)
+  target_compile_definitions(gpl_lib PRIVATE ENABLE_GPU)
+  # nesterovBase.h and other private gpl headers live in src/; sources
+  # under src/gpu/ need that on the include path explicitly because
+  # the compiler's default same-dir lookup points into src/gpu/ instead.
+  target_include_directories(gpl_lib PRIVATE src)
+  # The src/gpu/ TUs are device kernels. gpu/gpuRuntime.cpp carries no device
+  # code itself, but it includes <Kokkos_Core.hpp> for the lazy Kokkos
+  # initialize()/finalize(): when Kokkos is built with the CUDA (or HIP)
+  # backend, that header bakes KOKKOS_ENABLE_CUDA into its config and refuses
+  # to compile under a plain host compiler (it requires __CUDACC__). The same
+  # applies to src/fft.cpp, whose makeFftBackend() factory includes
+  # gpu/gpuFftBackend.h (Kokkos-dependent) to construct a GpuFftBackend. All
+  # such TUs are flagged with the device language to match the Kokkos backend.
+  # src/hpwl.cpp stays a plain CXX TU — gpu/gpuHpwlBackend.h is Kokkos-free, so
+  # its makeHpwlBackend() factory needs no device language.
+  # src/fftsg.cpp / src/fftsg2d.cpp are pure C++ Ooura code — left as CXX.
+  if(Kokkos_ENABLE_CUDA)
+    set_source_files_properties(
+      src/gpu/gpuHpwlBackend.cpp src/gpu/gpuRuntime.cpp src/gpu/gpuFftBackend.cpp
+      src/gpu/poissonSolver.cpp src/gpu/dct.cpp src/gpu/deviceState.cpp
+      src/gpu/gpuWirelengthGradientBackend.cpp src/gpu/wirelengthOp.cpp
+      src/gpu/gpuDensityGradientBackend.cpp src/gpu/densityOp.cpp
+      src/gpu/nesterovOp.cpp src/gpu/nesterovDeviceContext.cpp
+      src/fft.cpp
+      PROPERTIES LANGUAGE CUDA)
+  elseif(Kokkos_ENABLE_HIP)
+    set_source_files_properties(
+      src/gpu/gpuHpwlBackend.cpp src/gpu/gpuRuntime.cpp src/gpu/gpuFftBackend.cpp
+      src/gpu/poissonSolver.cpp src/gpu/dct.cpp src/gpu/deviceState.cpp
+      src/gpu/gpuWirelengthGradientBackend.cpp src/gpu/wirelengthOp.cpp
+      src/gpu/gpuDensityGradientBackend.cpp src/gpu/densityOp.cpp
+      src/gpu/nesterovOp.cpp src/gpu/nesterovDeviceContext.cpp
+      src/fft.cpp
+      PROPERTIES LANGUAGE HIP)
+  endif()
+  # Disable FP contraction for kernels that share gpl_lib's compile
+  # context so they stay bit-stable across compilers. Scoped to gpl_lib
+  # but the CXX flag is also harmless on the existing CPU TUs.
+  target_compile_options(gpl_lib PRIVATE
+    $<$<COMPILE_LANGUAGE:CXX>:-ffp-contract=off>
+    $<$<COMPILE_LANGUAGE:CUDA>:--fmad=false>
+    $<$<COMPILE_LANGUAGE:HIP>:-ffp-contract=off>
+  )
+  target_link_libraries(gpl_lib Kokkos::kokkos KokkosFFT::fft)
+  if(Kokkos_ENABLE_CUDA)
+    # cuda runtime symbols are referenced from the CUDA TU; expose cudart
+    # so that gpl_lib (and the openroad binary) link against libcudart.
+    target_link_libraries(gpl_lib CUDA::cudart)
+  endif()
+endif()
+
 target_sources(gpl
   PRIVATE
     src/MakeReplace.cpp
@@ -59,6 +137,13 @@ target_include_directories(gpl_lib
   PUBLIC
     include
     ${LEMON_INCLUDE_DIRS}
+  PRIVATE
+    # The PIMPL headers under src/gpu/ (deviceState.h, nesterovDeviceContext.h)
+    # are included from src/nesterovBase.cpp on both ENABLE_GPU=ON and OFF
+    # paths, and they need to find sibling headers like src/point.h. Add the
+    # src/ directory to the private include path unconditionally; previously
+    # it was only added inside the if(ENABLE_GPU) block.
+    src
 )
 
 target_link_libraries(gpl_lib

@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: BSD-3-Clause
+// Copyright (c) 2026, The OpenROAD Authors
+
+// BackendContext — a single bundle of construction parameters passed to each
+// of the gpl Strategy backend factories (makeHpwlBackend,
+// makeWirelengthGradientBackend, makeDensityGradientBackend, makeFftBackend).
+//
+// Each factory consumes the subset of fields it needs and ignores the rest;
+// callers build one context per construction site and reuse it across the
+// four factory calls. Plain C++ — Kokkos types are forward-declared elsewhere
+// and pointers (DeviceState*, NesterovBase*, NesterovBaseCommon*) are only
+// dereferenced inside backend translation units.
+
+#pragma once
+
+namespace gpl {
+
+class DeviceState;
+class NesterovBase;
+class NesterovBaseCommon;
+
+struct BackendContext
+{
+  // Owning / context pointers. nbc is required by the wirelength gradient
+  // backend; nb is required by the density gradient backend; device_state is
+  // borrowed by every GPU backend and ignored by the CPU backends.
+  NesterovBaseCommon* nbc = nullptr;
+  NesterovBase* nb = nullptr;
+  DeviceState* device_state = nullptr;
+
+  // OpenMP fan-out for the CPU backends.
+  int num_threads = 1;
+
+  // FFT-only grid geometry. Required by makeFftBackend; ignored elsewhere.
+  int bin_cnt_x = 0;
+  int bin_cnt_y = 0;
+  float bin_size_x = 0;
+  float bin_size_y = 0;
+};
+
+}  // namespace gpl