Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ option(USE_SYSTEM_ABC "Use system shared ABC library" OFF)
# Allow disabling tests
option(ENABLE_TESTS "Enable OpenROAD tests" ON)

# Opt-in GPU acceleration via Kokkos. The actual compute backend (CUDA, HIP,
# SYCL, or host-only OpenMP/Threads) is determined by the installed Kokkos
# package; OpenROAD inspects Kokkos_ENABLE_* and turns on the matching CMake
# language and dependencies automatically. See the per-module CMakeLists for
# how individual subsystems wire their GPU sources.
option(ENABLE_GPU "Enable GPU acceleration via Kokkos" OFF)

# Allow enabling address sanitizer
option(ASAN "Enable Address Sanitizer" OFF)

Expand Down Expand Up @@ -92,6 +99,13 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE RELEASE)
endif()

# GPU backend wiring (opt-in). All Kokkos / CUDA / HIP / SYCL detection,
# compiler probing, and language enablement live in cmake/KokkosBackend.cmake
# and are loaded only when the user opts in via ENABLE_GPU=ON.
if(ENABLE_GPU)
include(KokkosBackend)
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.3.0")
message(FATAL_ERROR "Insufficient gcc version. Found ${CMAKE_CXX_COMPILER_VERSION}, but require >= 8.3.0.")
Expand Down
158 changes: 158 additions & 0 deletions cmake/KokkosBackend.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2026, The OpenROAD Authors

# Kokkos GPU backend wiring for OpenROAD. Included from the root
# CMakeLists.txt only when ENABLE_GPU=ON; not loaded otherwise.
#
# Discovers the user's Kokkos install, inherits its compute backend, turns
# on the matching CMake language so downstream targets can mark kernel
# sources with set_source_files_properties(... LANGUAGE CUDA|HIP), and
# applies the small set of nvcc / fmt / host-compiler workarounds that the
# CUDA backend currently needs in modern Linux toolchains. Per-module
# CMakeLists (e.g. src/gpl) key off ENABLE_GPU and Kokkos_ENABLE_*; they
# do not need to call find_package(Kokkos) or enable_language() themselves.

find_package(Kokkos QUIET)
if(NOT Kokkos_FOUND)
message(FATAL_ERROR
"OpenROAD: ENABLE_GPU=ON requires the Kokkos package to be "
"installed and discoverable by CMake, but Kokkos was not found.\n"
" - If Kokkos is already installed: pass "
"-DKokkos_ROOT=/path/to/kokkos (or extend CMAKE_PREFIX_PATH).\n"
" - If not: build and install Kokkos from "
"https://github.com/kokkos/kokkos with the desired backend "
"(CUDA / HIP / SYCL / OpenMP) and a target architecture that "
"matches the host GPU.\n"
" - A future etc/DependencyInstaller.sh -gpu option will "
"automate this step.")
endif()

# KokkosFFT — required by the gpl GPU FFT backend (src/gpl/src/gpu/dct.cpp).
# A separate package from Kokkos core.
find_package(KokkosFFT QUIET)
if(NOT KokkosFFT_FOUND)
message(FATAL_ERROR
"ENABLE_GPU=ON requires KokkosFFT, which was not found.\n"
" - Install KokkosFFT (https://github.com/kokkos/kokkos-fft) against\n"
" your Kokkos build, then re-configure with -DKokkosFFT_ROOT=<prefix>.\n"
" - A future etc/DependencyInstaller.sh -gpu will install Kokkos and\n"
" KokkosFFT together.")
endif()

message(STATUS "OpenROAD: GPU acceleration enabled (Kokkos ${Kokkos_VERSION})")

if(Kokkos_ENABLE_CUDA)
# Auto-discover nvcc when the user has CUDA installed at a standard
# location but their environment does not expose it on PATH (common
# with IDE-launched configures: the bundled CMake does not inherit
# the shell PATH). enable_language(CUDA) below would otherwise abort
# with "No CMAKE_CUDA_COMPILER could be found" even though Kokkos's
# find_package already located the toolkit.
if(NOT DEFINED CMAKE_CUDA_COMPILER AND NOT DEFINED ENV{CUDACXX})
find_program(_OPENROAD_NVCC nvcc
HINTS ENV CUDA_HOME ENV CUDA_PATH ENV CUDA_ROOT
/usr/local/cuda/bin
/usr/local/cuda-13.0/bin
/usr/local/cuda-12.8/bin /usr/local/cuda-12.0/bin
/opt/cuda/bin
)
if(_OPENROAD_NVCC)
set(CMAKE_CUDA_COMPILER "${_OPENROAD_NVCC}" CACHE FILEPATH "")
message(STATUS "OpenROAD: auto-discovered nvcc at ${_OPENROAD_NVCC}")
endif()
endif()
# nvcc < 13 cannot parse glibc 2.38+'s _Float128 type that ships with
# gcc 13+'s C++ standard library headers (math.h template specialization
# for __iseqsig_type<_Float128>). When a known-broken pairing is detected,
# pin a compatible older g++ as the CUDA host compiler (the system C++
# compiler stays unchanged for non-CUDA TUs). Override is always
# available via -DCMAKE_CUDA_HOST_COMPILER or CUDAHOSTCXX.
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}
AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0"
AND _OPENROAD_NVCC)
execute_process(
COMMAND "${_OPENROAD_NVCC}" --version
OUTPUT_VARIABLE _OPENROAD_NVCC_VERSION_OUTPUT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(_OPENROAD_NVCC_VERSION_OUTPUT MATCHES "release ([0-9]+)")
set(_OPENROAD_NVCC_MAJOR "${CMAKE_MATCH_1}")
if(_OPENROAD_NVCC_MAJOR LESS 13)
foreach(_OPENROAD_GXX_VER 12 11)
find_program(_OPENROAD_CUDAHOST g++-${_OPENROAD_GXX_VER}
HINTS /usr/bin /usr/local/bin)
if(_OPENROAD_CUDAHOST)
set(CMAKE_CUDA_HOST_COMPILER "${_OPENROAD_CUDAHOST}"
CACHE FILEPATH "")
message(STATUS
"OpenROAD: pinning CUDA host compiler to "
"${_OPENROAD_CUDAHOST} (nvcc ${_OPENROAD_NVCC_MAJOR}.x + "
"glibc/gcc 13+ _Float128 compat)")
break()
endif()
unset(_OPENROAD_CUDAHOST CACHE)
endforeach()
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER)
message(FATAL_ERROR
"OpenROAD: nvcc ${_OPENROAD_NVCC_MAJOR}.x cannot parse "
"_Float128 declarations in glibc 2.38+ system headers used "
"by gcc ${CMAKE_CXX_COMPILER_VERSION}, and no compatible "
"g++-12 / g++-11 was found in /usr/bin or /usr/local/bin. "
"Install one (e.g. apt install g++-12) or set "
"-DCMAKE_CUDA_HOST_COMPILER=/path/to/older-g++ explicitly.")
endif()
endif()
endif()
endif()
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
if(DEFINED Kokkos_CUDA_ARCHITECTURES
AND NOT "${Kokkos_CUDA_ARCHITECTURES}" STREQUAL "")
set(CMAKE_CUDA_ARCHITECTURES "${Kokkos_CUDA_ARCHITECTURES}")
else()
message(FATAL_ERROR
"OpenROAD: ENABLE_GPU=ON with Kokkos CUDA backend, but the "
"Kokkos package does not advertise Kokkos_CUDA_ARCHITECTURES "
"and CMAKE_CUDA_ARCHITECTURES was not provided. Set "
"-DCMAKE_CUDA_ARCHITECTURES=<arch> explicitly (e.g. 89 for "
"RTX 4070, 120 for RTX 5090) or rebuild Kokkos with the "
"target architecture baked in.")
endif()
endif()
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
message(STATUS "OpenROAD: CUDA backend (arch=${CMAKE_CUDA_ARCHITECTURES})")
# A GPU driver (the kernel module exposing libcuda.so.1) is needed only to
# *run* CUDA code, never to build it -- nvcc cross-compiles device code on a
# host with no GPU. Note its absence so the resulting libcuda.so.1 load
# errors on this host (e.g. ctest, or running openroad) read as expected
# rather than as a misconfiguration. This is informational only: a GPU build
# on a driverless host is a supported cross-compile workflow, not an error.
if(NOT EXISTS "/proc/driver/nvidia")
message(STATUS
"OpenROAD: no NVIDIA driver on this host -- GPU code is being "
"cross-compiled. Run the GPU binaries and tests on a GPU machine.")
endif()
# nvcc 12.8 cannot parse fmt 11's nontype-template-parameter user-defined
# literals (fmt/bundled/format.h: operator""_a with fixed_string). The
# legacy literal fallback is still available; opt into it for CUDA TUs
# only. Project-wide CXX compilation is unaffected.
add_compile_definitions(
$<$<COMPILE_LANGUAGE:CUDA>:FMT_USE_NONTYPE_TEMPLATE_ARGS=0>)
# On aarch64, Boost's unordered_flat_map detects __ARM_NEON and includes
# <arm_neon.h> for SIMD-accelerated hashing. nvcc cannot parse gcc's
# arm_neon.h (it contains gcc-specific intrinsics), so disable the NEON
# path for CUDA TUs. The CPU TUs (compiled by g++) are unaffected.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64")
add_compile_definitions(
$<$<COMPILE_LANGUAGE:CUDA>:BOOST_UNORDERED_DISABLE_NEON>)
endif()
elseif(Kokkos_ENABLE_HIP)
enable_language(HIP)
message(STATUS "OpenROAD: HIP backend")
elseif(Kokkos_ENABLE_SYCL)
message(STATUS "OpenROAD: SYCL backend (driven by Kokkos host compiler)")
else()
message(STATUS
"OpenROAD: host-only Kokkos backend (Serial / OpenMP / Threads)")
endif()
10 changes: 10 additions & 0 deletions src/gpl/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,19 @@ cc_library(
name = "gpl",
srcs = [
"src/AbstractGraphics.cpp",
"src/backendContext.h",
"src/densityGradient.cpp",
"src/densityGradientBackend.h",
"src/fft.cpp",
"src/fft.h",
"src/fftBackend.h",
"src/fftsg.cpp",
"src/fftsg2d.cpp",
"src/gpu/deviceState.h",
"src/gpu/nesterovDeviceContext.h",
"src/graphicsNone.cpp",
"src/hpwl.cpp",
"src/hpwlBackend.h",
"src/initialPlace.cpp",
"src/initialPlace.h",
"src/mbff.cpp",
Expand All @@ -55,6 +63,8 @@ cc_library(
"src/solver.h",
"src/timingBase.cpp",
"src/timingBase.h",
"src/wirelengthGradient.cpp",
"src/wirelengthGradientBackend.h",
],
hdrs = [
"include/gpl/Replace.h",
Expand Down
85 changes: 85 additions & 0 deletions src/gpl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,91 @@ add_library(gpl_lib
src/fft.cpp
src/fftsg.cpp
src/fftsg2d.cpp
src/hpwl.cpp
src/wirelengthGradient.cpp
src/densityGradient.cpp
src/routeBase.cpp
src/timingBase.cpp
src/graphicsNone.cpp
src/solver.cpp
src/mbff.cpp
)

# --- HPWL & FFT backends: runtime switch (Strategy + Factory) ---
# The CPU backends (CpuHpwlBackend in src/hpwl.cpp, CpuFftBackend in
# src/fft.cpp, + the Ooura src/fftsg*.cpp) are always compiled. When
# ENABLE_GPU=ON the Kokkos GPU backends in src/gpu/ are also compiled in;
# makeHpwlBackend() / makeFftBackend() pick the backend per process at run
# time (gpl::gpuEnabled(), driven by the ENABLE_GPU env var). ENABLE_GPU is a
# compile definition gating the #ifdef in those two factories; the consumer
# headers (nesterovBase.h, fft.h) stay preprocessor-free. gpu/ is a
# file-layout subdirectory only (no nested CMakeLists.txt) so kernel build
# settings stay in this module's CMakeLists with the rest of gpl_lib.
if(ENABLE_GPU)
target_sources(gpl_lib PRIVATE
src/gpu/gpuHpwlBackend.cpp
src/gpu/gpuRuntime.cpp
src/gpu/gpuFftBackend.cpp
src/gpu/poissonSolver.cpp
src/gpu/dct.cpp
src/gpu/deviceState.cpp
src/gpu/gpuWirelengthGradientBackend.cpp
src/gpu/wirelengthOp.cpp
src/gpu/gpuDensityGradientBackend.cpp
src/gpu/densityOp.cpp
src/gpu/nesterovOp.cpp
src/gpu/nesterovDeviceContext.cpp)
target_compile_definitions(gpl_lib PRIVATE ENABLE_GPU)
# nesterovBase.h and other private gpl headers live in src/; sources
# under src/gpu/ need that on the include path explicitly because
# the compiler's default same-dir lookup points into src/gpu/ instead.
target_include_directories(gpl_lib PRIVATE src)
# The src/gpu/ TUs are device kernels. gpu/gpuRuntime.cpp carries no device
# code itself, but it includes <Kokkos_Core.hpp> for the lazy Kokkos
# initialize()/finalize(): when Kokkos is built with the CUDA (or HIP)
# backend, that header bakes KOKKOS_ENABLE_CUDA into its config and refuses
# to compile under a plain host compiler (it requires __CUDACC__). The same
# applies to src/fft.cpp, whose makeFftBackend() factory includes
# gpu/gpuFftBackend.h (Kokkos-dependent) to construct a GpuFftBackend. All
# such TUs are flagged with the device language to match the Kokkos backend.
# src/hpwl.cpp stays a plain CXX TU — gpu/gpuHpwlBackend.h is Kokkos-free, so
# its makeHpwlBackend() factory needs no device language.
# src/fftsg.cpp / src/fftsg2d.cpp are pure C++ Ooura code — left as CXX.
if(Kokkos_ENABLE_CUDA)
set_source_files_properties(
src/gpu/gpuHpwlBackend.cpp src/gpu/gpuRuntime.cpp src/gpu/gpuFftBackend.cpp
src/gpu/poissonSolver.cpp src/gpu/dct.cpp src/gpu/deviceState.cpp
src/gpu/gpuWirelengthGradientBackend.cpp src/gpu/wirelengthOp.cpp
src/gpu/gpuDensityGradientBackend.cpp src/gpu/densityOp.cpp
src/gpu/nesterovOp.cpp src/gpu/nesterovDeviceContext.cpp
src/fft.cpp
PROPERTIES LANGUAGE CUDA)
elseif(Kokkos_ENABLE_HIP)
set_source_files_properties(
src/gpu/gpuHpwlBackend.cpp src/gpu/gpuRuntime.cpp src/gpu/gpuFftBackend.cpp
src/gpu/poissonSolver.cpp src/gpu/dct.cpp src/gpu/deviceState.cpp
src/gpu/gpuWirelengthGradientBackend.cpp src/gpu/wirelengthOp.cpp
src/gpu/gpuDensityGradientBackend.cpp src/gpu/densityOp.cpp
src/gpu/nesterovOp.cpp src/gpu/nesterovDeviceContext.cpp
src/fft.cpp
PROPERTIES LANGUAGE HIP)
endif()
# Disable FP contraction for kernels that share gpl_lib's compile
# context so they stay bit-stable across compilers. Scoped to gpl_lib
# but the CXX flag is also harmless on the existing CPU TUs.
target_compile_options(gpl_lib PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:-ffp-contract=off>
$<$<COMPILE_LANGUAGE:CUDA>:--fmad=false>
$<$<COMPILE_LANGUAGE:HIP>:-ffp-contract=off>
)
target_link_libraries(gpl_lib Kokkos::kokkos KokkosFFT::fft)
if(Kokkos_ENABLE_CUDA)
# cuda runtime symbols are referenced from the CUDA TU; expose cudart
# so that gpl_lib (and the openroad binary) link against libcudart.
target_link_libraries(gpl_lib CUDA::cudart)
endif()
endif()

target_sources(gpl
PRIVATE
src/MakeReplace.cpp
Expand All @@ -59,6 +137,13 @@ target_include_directories(gpl_lib
PUBLIC
include
${LEMON_INCLUDE_DIRS}
PRIVATE
# The PIMPL headers under src/gpu/ (deviceState.h, nesterovDeviceContext.h)
# are included from src/nesterovBase.cpp on both ENABLE_GPU=ON and OFF
# paths, and they need to find sibling headers like src/point.h. Add the
# src/ directory to the private include path unconditionally; previously
# it was only added inside the if(ENABLE_GPU) block.
src
)

target_link_libraries(gpl_lib
Expand Down
41 changes: 41 additions & 0 deletions src/gpl/src/backendContext.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) 2026, The OpenROAD Authors

// BackendContext — a single bundle of construction parameters passed to each
// of the gpl Strategy backend factories (makeHpwlBackend,
// makeWirelengthGradientBackend, makeDensityGradientBackend, makeFftBackend).
//
// Each factory consumes the subset of fields it needs and ignores the rest;
// callers build one context per construction site and reuse it across the
// four factory calls. Plain C++ — Kokkos types are forward-declared elsewhere
// and pointers (DeviceState*, NesterovBase*, NesterovBaseCommon*) are only
// dereferenced inside backend translation units.

#pragma once

namespace gpl {

class DeviceState;
class NesterovBase;
class NesterovBaseCommon;

struct BackendContext
{
// Owning / context pointers. nbc is required by the wirelength gradient
// backend; nb is required by the density gradient backend; device_state is
// borrowed by every GPU backend and ignored by the CPU backends.
NesterovBaseCommon* nbc = nullptr;
NesterovBase* nb = nullptr;
DeviceState* device_state = nullptr;

// OpenMP fan-out for the CPU backends.
int num_threads = 1;

// FFT-only grid geometry. Required by makeFftBackend; ignored elsewhere.
int bin_cnt_x = 0;
int bin_cnt_y = 0;
float bin_size_x = 0;
float bin_size_y = 0;
};

} // namespace gpl
Loading
Loading