diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index b38e86a0ae69..26a3b9a2c413 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -118,10 +118,11 @@ def wrapper(exctype, value, trbk): sys.excepthook = tvm_wrap_excepthook(sys.excepthook) -# Autoload out-of-tree backends registered under the ``tvm.backends`` entry -# point group. Runs last, after the core runtime and the tvm namespace are -# fully initialized, so an extension can safely register into ``tvm.*`` and -# load extra libraries. Imported lazily here to avoid any import-cycle risk. -from ._autoload_backends import _autoload_backends +# Autoload backend runtime libraries and out-of-tree backends registered under +# the ``tvm.backends`` entry point group. Runs last, after the core runtime and +# the tvm namespace are fully initialized, so an extension can safely register +# into ``tvm.*`` and load extra libraries. +from .backend._autoload_backends import _autoload_backends +backend.autoload_backend_libs() _autoload_backends() diff --git a/python/tvm/_autoload_backends.py b/python/tvm/_autoload_backends.py index b45ac59d9da2..cf64719f1343 100644 --- a/python/tvm/_autoload_backends.py +++ b/python/tvm/_autoload_backends.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Autoload out-of-tree backends registered via ``tvm.backends`` entry points. +"""Compatibility route for backend autoload infrastructure. Out-of-tree extensions opt into being loaded automatically at ``import tvm`` time by declaring an entry point in the ``tvm.backends`` group:: @@ -25,26 +25,6 @@ Autoload can be disabled via ``TVM_DEVICE_BACKEND_AUTOLOAD=0``. """ -import os -import warnings -from importlib.metadata import entry_points +from .backend._autoload_backends import _autoload_backends -# Guard so autoload runs at most once per process, even if invoked again. -_AUTO_LOAD_DONE = False - - -def _autoload_backends(): - """Discover and invoke out-of-tree backends registered via entry points.""" - global _AUTO_LOAD_DONE - if _AUTO_LOAD_DONE: - return - _AUTO_LOAD_DONE = True - - if os.environ.get("TVM_DEVICE_BACKEND_AUTOLOAD", "1") == "0": - return - - for entry_pt in entry_points(group="tvm.backends"): - try: - entry_pt.load()() - except Exception as e: # pylint: disable=broad-except - warnings.warn(f"Failed to autoload tvm backend '{entry_pt.name}': {e}") +__all__ = ["_autoload_backends"] diff --git a/python/tvm/backend/__init__.py b/python/tvm/backend/__init__.py index 2243f5e03410..fe48b4fe6831 100644 --- a/python/tvm/backend/__init__.py +++ b/python/tvm/backend/__init__.py @@ -25,19 +25,10 @@ from pkgutil import extend_path from typing import Any +from ._autoload_backends import autoload_backend_libs, load_all + __path__ = extend_path(__path__, __name__) # type: ignore[name-defined] -_BUILTIN_BACKENDS = ( - "cuda", - "metal", - "rocm", - "trn", - "opencl", - "vulkan", - "webgpu", - "hexagon", - "adreno", -) _LOADED_BACKENDS: dict[str, Any] = {} @@ -192,18 +183,10 @@ def load(name: str) -> None: return None -def load_all() -> None: - """Load all in-tree backend Python hooks.""" - - for name in _BUILTIN_BACKENDS: - load(name) - return None - - def is_loaded(name: str) -> bool: """Return whether a backend has been loaded.""" return name in _LOADED_BACKENDS -__all__ = ["is_loaded", "load", "load_all"] +__all__ = ["autoload_backend_libs", "is_loaded", "load", "load_all"] diff --git a/python/tvm/backend/_autoload_backends.py b/python/tvm/backend/_autoload_backends.py new file mode 100644 index 000000000000..a7c63c0c4866 --- /dev/null +++ b/python/tvm/backend/_autoload_backends.py @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Autoload backend libraries and Python backend registration hooks.""" + +from __future__ import annotations + +import os +import warnings +from importlib import import_module +from importlib.metadata import entry_points +from pathlib import Path +from typing import Any + +from tvm_ffi.libinfo import load_lib_ctypes + +_BUILTIN_BACKENDS = ( + "cuda", + "metal", + "rocm", + "trn", + "opencl", + "vulkan", + "webgpu", + "hexagon", + "adreno", +) +_LEGACY_RUNTIME_LIBS_WITHOUT_BACKEND_PACKAGE = ("extra",) + +# Guard so autoload runs at most once per process, even if invoked again. +_BACKEND_LIBS_LOADED = False +_AUTO_LOAD_DONE = False + + +def autoload_backend_libs(loaded_libs: dict[str, Any] | None = None) -> None: + """Load each known backend runtime DSO into the process-global symbol namespace.""" + global _BACKEND_LIBS_LOADED + if _BACKEND_LIBS_LOADED: + return + + if loaded_libs is None: + from tvm.base import _LOADED_LIBS # pylint: disable=import-outside-toplevel + + loaded_libs = _LOADED_LIBS + + runtime_lib = loaded_libs.get("tvm_runtime") + if runtime_lib is None: + return + + _BACKEND_LIBS_LOADED = True + + runtime_dir = Path(runtime_lib._name).resolve().parent + for runtime_lib_name in _backend_runtime_lib_names(): + target_name = f"tvm_runtime_{runtime_lib_name}" + try: + loaded_libs[target_name] = load_lib_ctypes( + package="tvm", + target_name=target_name, + mode="RTLD_GLOBAL", + extra_lib_paths=[runtime_dir], + ) + except (OSError, FileNotFoundError, RuntimeError): + pass + + +def _backend_runtime_lib_names() -> tuple[str, ...]: + runtime_libs = [] + for backend in _BUILTIN_BACKENDS: + module = import_module(f"tvm.backend.{backend}") + runtime_libs.extend(getattr(module, "RUNTIME_LIBS", ())) + runtime_libs.extend(_LEGACY_RUNTIME_LIBS_WITHOUT_BACKEND_PACKAGE) + return tuple(runtime_libs) + + +def load_all() -> None: + """Load all in-tree backend Python hooks.""" + from . import load # pylint: disable=import-outside-toplevel + + for name in _BUILTIN_BACKENDS: + load(name) + return None + + +def _autoload_backends() -> None: + """Discover and invoke out-of-tree backends registered via entry points.""" + global _AUTO_LOAD_DONE + if _AUTO_LOAD_DONE: + return + _AUTO_LOAD_DONE = True + + if os.environ.get("TVM_DEVICE_BACKEND_AUTOLOAD", "1") == "0": + return + + for entry_pt in entry_points(group="tvm.backends"): + try: + entry_pt.load()() + except Exception as e: # pylint: disable=broad-except + warnings.warn(f"Failed to autoload tvm backend '{entry_pt.name}': {e}") diff --git a/python/tvm/backend/cuda/__init__.py b/python/tvm/backend/cuda/__init__.py index 34592d2de3e7..92b23747e89a 100644 --- a/python/tvm/backend/cuda/__init__.py +++ b/python/tvm/backend/cuda/__init__.py @@ -19,12 +19,29 @@ from importlib import import_module _LAZY_SUBMODULES = {"lang", "op", "operator", "script", "target_tags"} +RUNTIME_LIBS = ("cuda",) + + +def _detect_target_from_device(dev): + from tvm.target import Target # pylint: disable=import-outside-toplevel + + return Target( + { + "kind": "cuda", + "max_shared_memory_per_block": dev.max_shared_memory_per_block, + "max_threads_per_block": dev.max_threads_per_block, + "thread_warp_size": dev.warp_size, + "arch": "sm_" + dev.compute_version.replace(".", ""), + } + ) def register_backend(): """Register CUDA-owned Python semantics.""" + from tvm.target.detect_target import register_device_target_detector from tvm.tirx.script.builder import ir as builder_ir # pylint: disable=import-outside-toplevel + register_device_target_detector("cuda", _detect_target_from_device) for name, namespace in script_namespaces().items(): builder_ir.register_script_namespace(name, namespace) @@ -64,6 +81,7 @@ def __getattr__(name: str): "op", "operator", "register_backend", + "RUNTIME_LIBS", "script", "script_namespace", "script_namespaces", diff --git a/python/tvm/backend/hexagon/__init__.py b/python/tvm/backend/hexagon/__init__.py index 3852e36ccc3b..d625cc3671d0 100644 --- a/python/tvm/backend/hexagon/__init__.py +++ b/python/tvm/backend/hexagon/__init__.py @@ -19,6 +19,7 @@ from importlib import import_module _LAZY_SUBMODULES = {"target_tags"} +RUNTIME_LIBS = ("hexagon",) def register_backend(): @@ -32,4 +33,4 @@ def __getattr__(name: str): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") -__all__ = ["register_backend", "target_tags"] +__all__ = ["register_backend", "RUNTIME_LIBS", "target_tags"] diff --git a/python/tvm/backend/metal/__init__.py b/python/tvm/backend/metal/__init__.py index d42806433f73..d464b5519142 100644 --- a/python/tvm/backend/metal/__init__.py +++ b/python/tvm/backend/metal/__init__.py @@ -19,12 +19,28 @@ from importlib import import_module _LAZY_SUBMODULES = {"op", "script", "target_tags"} +RUNTIME_LIBS = ("metal",) + + +def _detect_target_from_device(dev): + from tvm.target import Target # pylint: disable=import-outside-toplevel + + return Target( + { + "kind": "metal", + "max_shared_memory_per_block": 32768, + "max_threads_per_block": dev.max_threads_per_block, + "thread_warp_size": dev.warp_size, + } + ) def register_backend(): """Register Metal-owned Python semantics.""" + from tvm.target.detect_target import register_device_target_detector from tvm.tirx.script.builder import ir as builder_ir # pylint: disable=import-outside-toplevel + register_device_target_detector("metal", _detect_target_from_device) for name, namespace in script_namespaces().items(): builder_ir.register_script_namespace(name, namespace) import_module(f"{__name__}.target_tags") @@ -51,6 +67,7 @@ def __getattr__(name: str): __all__ = [ "op", "register_backend", + "RUNTIME_LIBS", "script", "script_namespace", "script_namespaces", diff --git a/python/tvm/backend/opencl/__init__.py b/python/tvm/backend/opencl/__init__.py index a80696e5900b..e46884e6d12c 100644 --- a/python/tvm/backend/opencl/__init__.py +++ b/python/tvm/backend/opencl/__init__.py @@ -16,10 +16,28 @@ # under the License. """OpenCL-owned backend hooks.""" +RUNTIME_LIBS = ("opencl",) + + +def _detect_target_from_device(dev): + from tvm.target import Target # pylint: disable=import-outside-toplevel + + return Target( + { + "kind": "opencl", + "max_shared_memory_per_block": dev.max_shared_memory_per_block, + "max_threads_per_block": dev.max_threads_per_block, + "thread_warp_size": dev.warp_size, + } + ) + def register_backend(): """Register OpenCL-owned Python semantics.""" + from tvm.target.detect_target import register_device_target_detector + + register_device_target_detector("opencl", _detect_target_from_device) return None -__all__ = ["register_backend"] +__all__ = ["register_backend", "RUNTIME_LIBS"] diff --git a/python/tvm/backend/rocm/__init__.py b/python/tvm/backend/rocm/__init__.py index d7574e974a30..f2f4794da290 100644 --- a/python/tvm/backend/rocm/__init__.py +++ b/python/tvm/backend/rocm/__init__.py @@ -16,10 +16,29 @@ # under the License. """ROCm-owned TIRx modules.""" +RUNTIME_LIBS = ("rocm",) + + +def _detect_target_from_device(dev): + from tvm.target import Target # pylint: disable=import-outside-toplevel + + return Target( + { + "kind": "rocm", + "mtriple": "amdgcn-amd-amdhsa-hcc", + "max_shared_memory_per_block": dev.max_shared_memory_per_block, + "max_threads_per_block": dev.max_threads_per_block, + "thread_warp_size": dev.warp_size, + } + ) + def register_backend(): """Register ROCm-owned Python semantics.""" + from tvm.target.detect_target import register_device_target_detector + + register_device_target_detector("rocm", _detect_target_from_device) return None -__all__ = ["register_backend"] +__all__ = ["register_backend", "RUNTIME_LIBS"] diff --git a/python/tvm/backend/vulkan/__init__.py b/python/tvm/backend/vulkan/__init__.py index 343875aa8a02..eeaa40082c5d 100644 --- a/python/tvm/backend/vulkan/__init__.py +++ b/python/tvm/backend/vulkan/__init__.py @@ -16,10 +16,39 @@ # under the License. """Vulkan-owned backend hooks.""" +RUNTIME_LIBS = ("vulkan",) + + +def _detect_target_from_device(dev): + from tvm import get_global_func # pylint: disable=import-outside-toplevel + from tvm.target import Target # pylint: disable=import-outside-toplevel + + f_get_target_property = get_global_func("device_api.vulkan.get_target_property") + return Target( + { + "kind": "vulkan", + "max_threads_per_block": dev.max_threads_per_block, + "max_shared_memory_per_block": dev.max_shared_memory_per_block, + "thread_warp_size": dev.warp_size, + "supports_float16": f_get_target_property(dev, "supports_float16"), + "supports_int8": f_get_target_property(dev, "supports_int8"), + "supports_int16": f_get_target_property(dev, "supports_int16"), + "supports_int64": f_get_target_property(dev, "supports_int64"), + "supports_8bit_buffer": f_get_target_property(dev, "supports_8bit_buffer"), + "supports_16bit_buffer": f_get_target_property(dev, "supports_16bit_buffer"), + "supports_storage_buffer_storage_class": f_get_target_property( + dev, "supports_storage_buffer_storage_class" + ), + } + ) + def register_backend(): """Register Vulkan-owned Python semantics.""" + from tvm.target.detect_target import register_device_target_detector + + register_device_target_detector("vulkan", _detect_target_from_device) return None -__all__ = ["register_backend"] +__all__ = ["register_backend", "RUNTIME_LIBS"] diff --git a/python/tvm/base.py b/python/tvm/base.py index 5c1e75566e18..e850f9c214d1 100644 --- a/python/tvm/base.py +++ b/python/tvm/base.py @@ -19,7 +19,6 @@ """Base library for TVM.""" import os -from pathlib import Path from tvm_ffi.libinfo import load_lib_ctypes @@ -40,36 +39,12 @@ _LOADED_LIBS = {} -def load_backend_libs(runtime_lib_path: str) -> None: - """Load each known backend runtime DSO into ``_LOADED_LIBS``; failures are silent.""" - # Known per-backend runtime DSOs that, when present, are loaded with - # RTLD_GLOBAL so their static initializers register the device backend. - backend_runtime_libs = ["cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra"] - runtime_dir = Path(runtime_lib_path).resolve().parent - for backend in backend_runtime_libs: - target_name = f"tvm_runtime_{backend}" - try: - _LOADED_LIBS[target_name] = load_lib_ctypes( - package="tvm", - target_name=target_name, - mode="RTLD_GLOBAL", - extra_lib_paths=[runtime_dir], - ) - except (OSError, FileNotFoundError, RuntimeError): - pass - - # runtime is loaded RTLD_GLOBAL to expose its symbols to subsequent loads; # compiler is loaded RTLD_LOCAL. _LOADED_LIBS["tvm_runtime"] = load_lib_ctypes( "tvm", "tvm_runtime", "RTLD_GLOBAL", extra_lib_paths=libinfo.package_lib_paths() ) -# After libtvm_runtime.so is in the global symbol namespace, scan the same -# directory for per-backend DSOs (libtvm_runtime_cuda.so, etc.) and load each -# with RTLD_GLOBAL so their static initializers register device backends. -load_backend_libs(_LOADED_LIBS["tvm_runtime"]._name) - if not _RUNTIME_ONLY: try: _LOADED_LIBS["tvm_compiler"] = load_lib_ctypes( diff --git a/python/tvm/target/detect_target.py b/python/tvm/target/detect_target.py index 81accfed1287..e7c434af6160 100644 --- a/python/tvm/target/detect_target.py +++ b/python/tvm/target/detect_target.py @@ -16,79 +16,14 @@ # under the License. """Detect target.""" +from collections.abc import Callable + from tvm_ffi import get_global_func from ..runtime import Device, device from . import Target -def _detect_metal(dev: Device) -> Target: - return Target( - { - "kind": "metal", - "max_shared_memory_per_block": 32768, - "max_threads_per_block": dev.max_threads_per_block, - "thread_warp_size": dev.warp_size, - } - ) - - -def _detect_cuda(dev: Device) -> Target: - return Target( - { - "kind": "cuda", - "max_shared_memory_per_block": dev.max_shared_memory_per_block, - "max_threads_per_block": dev.max_threads_per_block, - "thread_warp_size": dev.warp_size, - "arch": "sm_" + dev.compute_version.replace(".", ""), - } - ) - - -def _detect_rocm(dev: Device) -> Target: - return Target( - { - "kind": "rocm", - "mtriple": "amdgcn-amd-amdhsa-hcc", - "max_shared_memory_per_block": dev.max_shared_memory_per_block, - "max_threads_per_block": dev.max_threads_per_block, - "thread_warp_size": dev.warp_size, - } - ) - - -def _detect_opencl(dev: Device) -> Target: - return Target( - { - "kind": "opencl", - "max_shared_memory_per_block": dev.max_shared_memory_per_block, - "max_threads_per_block": dev.max_threads_per_block, - "thread_warp_size": dev.warp_size, - } - ) - - -def _detect_vulkan(dev: Device) -> Target: - f_get_target_property = get_global_func("device_api.vulkan.get_target_property") - return Target( - { - "kind": "vulkan", - "max_threads_per_block": dev.max_threads_per_block, - "max_shared_memory_per_block": dev.max_shared_memory_per_block, - "thread_warp_size": dev.warp_size, - "supports_float16": f_get_target_property(dev, "supports_float16"), - "supports_int8": f_get_target_property(dev, "supports_int8"), - "supports_int16": f_get_target_property(dev, "supports_int16"), - "supports_int64": f_get_target_property(dev, "supports_int64"), - "supports_8bit_buffer": f_get_target_property(dev, "supports_8bit_buffer"), - "supports_16bit_buffer": f_get_target_property(dev, "supports_16bit_buffer"), - "supports_storage_buffer_storage_class": f_get_target_property( - dev, "supports_storage_buffer_storage_class" - ), - } - ) - - def _detect_cpu(dev: Device) -> Target: # pylint: disable=unused-argument """Detect the host CPU architecture.""" return Target( @@ -106,6 +41,19 @@ def _detect_cpu(dev: Device) -> Target: # pylint: disable=unused-argument ) +SUPPORTED_DEVICE: dict[str, Callable[[Device], Target]] = { + "cpu": _detect_cpu, +} + +# Backward-compatible alias for the previous private module-level map. +SUPPORT_DEVICE = SUPPORTED_DEVICE + + +def register_device_target_detector(device_type: str, detector: Callable[[Device], Target]) -> None: + """Register target detection for a runtime device type.""" + SUPPORTED_DEVICE[device_type] = detector + + def detect_target_from_device(dev: str | Device) -> Target: """Detects Target associated with the given device. If the device does not exist, there will be an Error. @@ -114,7 +62,7 @@ def detect_target_from_device(dev: str | Device) -> Target: ---------- dev : Union[str, Device] The device to detect the target for. - Supported device types: ["cuda", "metal", "rocm", "vulkan", "opencl"] + Supported device types are registered by backend hooks. Returns ------- @@ -124,24 +72,14 @@ def detect_target_from_device(dev: str | Device) -> Target: if isinstance(dev, str): dev = device(dev) device_type = Device._DEVICE_TYPE_TO_NAME[dev.dlpack_device_type()] - if device_type not in SUPPORT_DEVICE: + if device_type not in SUPPORTED_DEVICE: raise ValueError( f"Auto detection for device `{device_type}` is not supported. " - f"Currently only supports: {SUPPORT_DEVICE.keys()}" + f"Currently only supports: {SUPPORTED_DEVICE.keys()}" ) if not dev.exist: raise ValueError( f"Cannot detect device `{dev}`. Please make sure the device and its driver " "is installed properly, and TVM is compiled with the driver" ) - return SUPPORT_DEVICE[device_type](dev) - - -SUPPORT_DEVICE = { - "cpu": _detect_cpu, - "cuda": _detect_cuda, - "metal": _detect_metal, - "vulkan": _detect_vulkan, - "rocm": _detect_rocm, - "opencl": _detect_opencl, -} + return SUPPORTED_DEVICE[device_type](dev) diff --git a/python/tvm/target/x86.py b/python/tvm/target/x86.py deleted file mode 100644 index 80399c749bd8..000000000000 --- a/python/tvm/target/x86.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Common x86 related utilities""" - -from tvm_ffi import register_global_func - -from .codegen import target_has_features - - -@register_global_func("tvm.topi.x86.utils.get_simd_32bit_lanes") -def get_simd_32bit_lanes(): - """X86 SIMD optimal vector length lookup. - Parameters - ---------- - Returns - ------- - vec_len : int - The optimal vector length of CPU from the global context target. - """ - vec_len = 4 - if target_has_features(["avx512bw", "avx512f"]): - vec_len = 16 - elif target_has_features("avx2"): - vec_len = 8 - return vec_len