From 3eb27d41d37952749b7d019e6e84959a7e35e804 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 14 Jun 2026 19:21:41 +0000
Subject: [PATCH 1/4] [REFACTOR][PYTHON] Consolidate backend autoload infra

Backend loading is easier to reason about when library discovery, in-tree backend hook loading, and out-of-tree entry point autoload all live under the backend namespace.

This moves the implementations into tvm.backend._autoload_backends while preserving compatibility routes for the previous top-level autoload helper and tvm.base.load_backend_libs.
---
 python/tvm/__init__.py                   |  4 +-
 python/tvm/_autoload_backends.py         | 26 +------
 python/tvm/backend/__init__.py           | 23 +-----
 python/tvm/backend/_autoload_backends.py | 90 ++++++++++++++++++++++++
 python/tvm/base.py                       | 22 ++----
 5 files changed, 102 insertions(+), 63 deletions(-)
 create mode 100644 python/tvm/backend/_autoload_backends.py

diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
index b38e86a0ae69..01767e193fbc 100644
--- a/python/tvm/__init__.py
+++ b/python/tvm/__init__.py
@@ -121,7 +121,7 @@ def wrapper(exctype, value, trbk):
 # Autoload out-of-tree backends registered under the ``tvm.backends`` entry
 # point group. Runs last, after the core runtime and the tvm namespace are
 # fully initialized, so an extension can safely register into ``tvm.*`` and
-# load extra libraries. Imported lazily here to avoid any import-cycle risk.
-from ._autoload_backends import _autoload_backends
+# load extra libraries.
+from .backend._autoload_backends import _autoload_backends
 
 _autoload_backends()
diff --git a/python/tvm/_autoload_backends.py b/python/tvm/_autoload_backends.py
index b45ac59d9da2..cf64719f1343 100644
--- a/python/tvm/_autoload_backends.py
+++ b/python/tvm/_autoload_backends.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Autoload out-of-tree backends registered via ``tvm.backends`` entry points.
+"""Compatibility route for backend autoload infrastructure.
 
 Out-of-tree extensions opt into being loaded automatically at ``import tvm``
 time by declaring an entry point in the ``tvm.backends`` group::
@@ -25,26 +25,6 @@
 Autoload can be disabled via ``TVM_DEVICE_BACKEND_AUTOLOAD=0``.
 """
 
-import os
-import warnings
-from importlib.metadata import entry_points
+from .backend._autoload_backends import _autoload_backends
 
-# Guard so autoload runs at most once per process, even if invoked again.
-_AUTO_LOAD_DONE = False
-
-
-def _autoload_backends():
-    """Discover and invoke out-of-tree backends registered via entry points."""
-    global _AUTO_LOAD_DONE
-    if _AUTO_LOAD_DONE:
-        return
-    _AUTO_LOAD_DONE = True
-
-    if os.environ.get("TVM_DEVICE_BACKEND_AUTOLOAD", "1") == "0":
-        return
-
-    for entry_pt in entry_points(group="tvm.backends"):
-        try:
-            entry_pt.load()()
-        except Exception as e:  # pylint: disable=broad-except
-            warnings.warn(f"Failed to autoload tvm backend '{entry_pt.name}': {e}")
+__all__ = ["_autoload_backends"]
diff --git a/python/tvm/backend/__init__.py b/python/tvm/backend/__init__.py
index 2243f5e03410..1085c538d784 100644
--- a/python/tvm/backend/__init__.py
+++ b/python/tvm/backend/__init__.py
@@ -25,19 +25,10 @@
 from pkgutil import extend_path
 from typing import Any
 
+from ._autoload_backends import load_all, load_backend_libs
+
 __path__ = extend_path(__path__, __name__)  # type: ignore[name-defined]
 
-_BUILTIN_BACKENDS = (
-    "cuda",
-    "metal",
-    "rocm",
-    "trn",
-    "opencl",
-    "vulkan",
-    "webgpu",
-    "hexagon",
-    "adreno",
-)
 _LOADED_BACKENDS: dict[str, Any] = {}
 
 
@@ -192,18 +183,10 @@ def load(name: str) -> None:
     return None
 
 
-def load_all() -> None:
-    """Load all in-tree backend Python hooks."""
-
-    for name in _BUILTIN_BACKENDS:
-        load(name)
-    return None
-
-
 def is_loaded(name: str) -> bool:
     """Return whether a backend has been loaded."""
 
     return name in _LOADED_BACKENDS
 
 
-__all__ = ["is_loaded", "load", "load_all"]
+__all__ = ["is_loaded", "load", "load_all", "load_backend_libs"]
diff --git a/python/tvm/backend/_autoload_backends.py b/python/tvm/backend/_autoload_backends.py
new file mode 100644
index 000000000000..5d8e181df549
--- /dev/null
+++ b/python/tvm/backend/_autoload_backends.py
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Autoload backend libraries and Python backend registration hooks."""
+
+from __future__ import annotations
+
+import os
+import warnings
+from importlib.metadata import entry_points
+from pathlib import Path
+from typing import Any
+
+from tvm_ffi.libinfo import load_lib_ctypes
+
+_BUILTIN_BACKENDS = (
+    "cuda",
+    "metal",
+    "rocm",
+    "trn",
+    "opencl",
+    "vulkan",
+    "webgpu",
+    "hexagon",
+    "adreno",
+)
+_BACKEND_RUNTIME_LIBS = ("cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra")
+
+# Guard so autoload runs at most once per process, even if invoked again.
+_AUTO_LOAD_DONE = False
+
+
+def load_backend_libs(runtime_lib_path: str, loaded_libs: dict[str, Any] | None = None) -> None:
+    """Load each known backend runtime DSO into the process-global symbol namespace."""
+    if loaded_libs is None:
+        from tvm.base import _LOADED_LIBS  # pylint: disable=import-outside-toplevel
+
+        loaded_libs = _LOADED_LIBS
+
+    runtime_dir = Path(runtime_lib_path).resolve().parent
+    for backend in _BACKEND_RUNTIME_LIBS:
+        target_name = f"tvm_runtime_{backend}"
+        try:
+            loaded_libs[target_name] = load_lib_ctypes(
+                package="tvm",
+                target_name=target_name,
+                mode="RTLD_GLOBAL",
+                extra_lib_paths=[runtime_dir],
+            )
+        except (OSError, FileNotFoundError, RuntimeError):
+            pass
+
+
+def load_all() -> None:
+    """Load all in-tree backend Python hooks."""
+    from . import load  # pylint: disable=import-outside-toplevel
+
+    for name in _BUILTIN_BACKENDS:
+        load(name)
+    return None
+
+
+def _autoload_backends() -> None:
+    """Discover and invoke out-of-tree backends registered via entry points."""
+    global _AUTO_LOAD_DONE
+    if _AUTO_LOAD_DONE:
+        return
+    _AUTO_LOAD_DONE = True
+
+    if os.environ.get("TVM_DEVICE_BACKEND_AUTOLOAD", "1") == "0":
+        return
+
+    for entry_pt in entry_points(group="tvm.backends"):
+        try:
+            entry_pt.load()()
+        except Exception as e:  # pylint: disable=broad-except
+            warnings.warn(f"Failed to autoload tvm backend '{entry_pt.name}': {e}")
diff --git a/python/tvm/base.py b/python/tvm/base.py
index 5c1e75566e18..cfa7d070c32d 100644
--- a/python/tvm/base.py
+++ b/python/tvm/base.py
@@ -19,11 +19,11 @@
 """Base library for TVM."""
 
 import os
-from pathlib import Path
 
 from tvm_ffi.libinfo import load_lib_ctypes
 
 from . import libinfo
+from .backend._autoload_backends import load_backend_libs as _load_backend_libs
 
 # ----------------------------
 # library loading
@@ -41,22 +41,8 @@
 
 
 def load_backend_libs(runtime_lib_path: str) -> None:
-    """Load each known backend runtime DSO into ``_LOADED_LIBS``; failures are silent."""
-    # Known per-backend runtime DSOs that, when present, are loaded with
-    # RTLD_GLOBAL so their static initializers register the device backend.
-    backend_runtime_libs = ["cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra"]
-    runtime_dir = Path(runtime_lib_path).resolve().parent
-    for backend in backend_runtime_libs:
-        target_name = f"tvm_runtime_{backend}"
-        try:
-            _LOADED_LIBS[target_name] = load_lib_ctypes(
-                package="tvm",
-                target_name=target_name,
-                mode="RTLD_GLOBAL",
-                extra_lib_paths=[runtime_dir],
-            )
-        except (OSError, FileNotFoundError, RuntimeError):
-            pass
+    """Load backend runtime DSOs through ``tvm.backend`` autoload infrastructure."""
+    _load_backend_libs(runtime_lib_path, _LOADED_LIBS)
 
 
 # runtime is loaded RTLD_GLOBAL to expose its symbols to subsequent loads;
@@ -68,7 +54,7 @@ def load_backend_libs(runtime_lib_path: str) -> None:
 # After libtvm_runtime.so is in the global symbol namespace, scan the same
 # directory for per-backend DSOs (libtvm_runtime_cuda.so, etc.) and load each
 # with RTLD_GLOBAL so their static initializers register device backends.
-load_backend_libs(_LOADED_LIBS["tvm_runtime"]._name)
+_load_backend_libs(_LOADED_LIBS["tvm_runtime"]._name, _LOADED_LIBS)
 
 if not _RUNTIME_ONLY:
     try:

From 0dee5bd53030c63bb0506171b970802de3fe0b01 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 14 Jun 2026 19:26:59 +0000
Subject: [PATCH 2/4] [REFACTOR][PYTHON] Move target detection hooks to
 backends

Backend-specific target detection belongs with the backend hook registration path instead of a central target-side device map.

This keeps CPU detection in the generic target module, lets backend hooks register their own device detectors, and removes the unused x86 SIMD lane global hook.
---
 python/tvm/backend/cuda/__init__.py   |  16 +++++
 python/tvm/backend/metal/__init__.py  |  15 ++++
 python/tvm/backend/opencl/__init__.py |  16 +++++
 python/tvm/backend/rocm/__init__.py   |  17 +++++
 python/tvm/backend/vulkan/__init__.py |  27 +++++++
 python/tvm/target/detect_target.py    | 100 +++++---------------------
 python/tvm/target/x86.py              |  39 ----------
 7 files changed, 110 insertions(+), 120 deletions(-)
 delete mode 100644 python/tvm/target/x86.py

diff --git a/python/tvm/backend/cuda/__init__.py b/python/tvm/backend/cuda/__init__.py
index 34592d2de3e7..60c34b0e63f3 100644
--- a/python/tvm/backend/cuda/__init__.py
+++ b/python/tvm/backend/cuda/__init__.py
@@ -21,10 +21,26 @@
 _LAZY_SUBMODULES = {"lang", "op", "operator", "script", "target_tags"}
 
 
+def _detect_target_from_device(dev):
+    from tvm.target import Target  # pylint: disable=import-outside-toplevel
+
+    return Target(
+        {
+            "kind": "cuda",
+            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
+            "max_threads_per_block": dev.max_threads_per_block,
+            "thread_warp_size": dev.warp_size,
+            "arch": "sm_" + dev.compute_version.replace(".", ""),
+        }
+    )
+
+
 def register_backend():
     """Register CUDA-owned Python semantics."""
+    from tvm.target.detect_target import register_device_target_detector
     from tvm.tirx.script.builder import ir as builder_ir  # pylint: disable=import-outside-toplevel
 
+    register_device_target_detector("cuda", _detect_target_from_device)
     for name, namespace in script_namespaces().items():
         builder_ir.register_script_namespace(name, namespace)
 
diff --git a/python/tvm/backend/metal/__init__.py b/python/tvm/backend/metal/__init__.py
index d42806433f73..044807db4be4 100644
--- a/python/tvm/backend/metal/__init__.py
+++ b/python/tvm/backend/metal/__init__.py
@@ -21,10 +21,25 @@
 _LAZY_SUBMODULES = {"op", "script", "target_tags"}
 
 
+def _detect_target_from_device(dev):
+    from tvm.target import Target  # pylint: disable=import-outside-toplevel
+
+    return Target(
+        {
+            "kind": "metal",
+            "max_shared_memory_per_block": 32768,
+            "max_threads_per_block": dev.max_threads_per_block,
+            "thread_warp_size": dev.warp_size,
+        }
+    )
+
+
 def register_backend():
     """Register Metal-owned Python semantics."""
+    from tvm.target.detect_target import register_device_target_detector
     from tvm.tirx.script.builder import ir as builder_ir  # pylint: disable=import-outside-toplevel
 
+    register_device_target_detector("metal", _detect_target_from_device)
     for name, namespace in script_namespaces().items():
         builder_ir.register_script_namespace(name, namespace)
     import_module(f"{__name__}.target_tags")
diff --git a/python/tvm/backend/opencl/__init__.py b/python/tvm/backend/opencl/__init__.py
index a80696e5900b..c4b4cd71c2c2 100644
--- a/python/tvm/backend/opencl/__init__.py
+++ b/python/tvm/backend/opencl/__init__.py
@@ -17,8 +17,24 @@
 """OpenCL-owned backend hooks."""
 
 
+def _detect_target_from_device(dev):
+    from tvm.target import Target  # pylint: disable=import-outside-toplevel
+
+    return Target(
+        {
+            "kind": "opencl",
+            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
+            "max_threads_per_block": dev.max_threads_per_block,
+            "thread_warp_size": dev.warp_size,
+        }
+    )
+
+
 def register_backend():
     """Register OpenCL-owned Python semantics."""
+    from tvm.target.detect_target import register_device_target_detector
+
+    register_device_target_detector("opencl", _detect_target_from_device)
     return None
 
 
diff --git a/python/tvm/backend/rocm/__init__.py b/python/tvm/backend/rocm/__init__.py
index d7574e974a30..35eea6972b94 100644
--- a/python/tvm/backend/rocm/__init__.py
+++ b/python/tvm/backend/rocm/__init__.py
@@ -17,8 +17,25 @@
 """ROCm-owned TIRx modules."""
 
 
+def _detect_target_from_device(dev):
+    from tvm.target import Target  # pylint: disable=import-outside-toplevel
+
+    return Target(
+        {
+            "kind": "rocm",
+            "mtriple": "amdgcn-amd-amdhsa-hcc",
+            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
+            "max_threads_per_block": dev.max_threads_per_block,
+            "thread_warp_size": dev.warp_size,
+        }
+    )
+
+
 def register_backend():
     """Register ROCm-owned Python semantics."""
+    from tvm.target.detect_target import register_device_target_detector
+
+    register_device_target_detector("rocm", _detect_target_from_device)
     return None
 
 
diff --git a/python/tvm/backend/vulkan/__init__.py b/python/tvm/backend/vulkan/__init__.py
index 343875aa8a02..b973e331181b 100644
--- a/python/tvm/backend/vulkan/__init__.py
+++ b/python/tvm/backend/vulkan/__init__.py
@@ -17,8 +17,35 @@
 """Vulkan-owned backend hooks."""
 
 
+def _detect_target_from_device(dev):
+    from tvm import get_global_func  # pylint: disable=import-outside-toplevel
+    from tvm.target import Target  # pylint: disable=import-outside-toplevel
+
+    f_get_target_property = get_global_func("device_api.vulkan.get_target_property")
+    return Target(
+        {
+            "kind": "vulkan",
+            "max_threads_per_block": dev.max_threads_per_block,
+            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
+            "thread_warp_size": dev.warp_size,
+            "supports_float16": f_get_target_property(dev, "supports_float16"),
+            "supports_int8": f_get_target_property(dev, "supports_int8"),
+            "supports_int16": f_get_target_property(dev, "supports_int16"),
+            "supports_int64": f_get_target_property(dev, "supports_int64"),
+            "supports_8bit_buffer": f_get_target_property(dev, "supports_8bit_buffer"),
+            "supports_16bit_buffer": f_get_target_property(dev, "supports_16bit_buffer"),
+            "supports_storage_buffer_storage_class": f_get_target_property(
+                dev, "supports_storage_buffer_storage_class"
+            ),
+        }
+    )
+
+
 def register_backend():
     """Register Vulkan-owned Python semantics."""
+    from tvm.target.detect_target import register_device_target_detector
+
+    register_device_target_detector("vulkan", _detect_target_from_device)
     return None
 
 
diff --git a/python/tvm/target/detect_target.py b/python/tvm/target/detect_target.py
index 81accfed1287..e7c434af6160 100644
--- a/python/tvm/target/detect_target.py
+++ b/python/tvm/target/detect_target.py
@@ -16,79 +16,14 @@
 # under the License.
 """Detect target."""
 
+from collections.abc import Callable
+
 from tvm_ffi import get_global_func
 
 from ..runtime import Device, device
 from . import Target
 
 
-def _detect_metal(dev: Device) -> Target:
-    return Target(
-        {
-            "kind": "metal",
-            "max_shared_memory_per_block": 32768,
-            "max_threads_per_block": dev.max_threads_per_block,
-            "thread_warp_size": dev.warp_size,
-        }
-    )
-
-
-def _detect_cuda(dev: Device) -> Target:
-    return Target(
-        {
-            "kind": "cuda",
-            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
-            "max_threads_per_block": dev.max_threads_per_block,
-            "thread_warp_size": dev.warp_size,
-            "arch": "sm_" + dev.compute_version.replace(".", ""),
-        }
-    )
-
-
-def _detect_rocm(dev: Device) -> Target:
-    return Target(
-        {
-            "kind": "rocm",
-            "mtriple": "amdgcn-amd-amdhsa-hcc",
-            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
-            "max_threads_per_block": dev.max_threads_per_block,
-            "thread_warp_size": dev.warp_size,
-        }
-    )
-
-
-def _detect_opencl(dev: Device) -> Target:
-    return Target(
-        {
-            "kind": "opencl",
-            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
-            "max_threads_per_block": dev.max_threads_per_block,
-            "thread_warp_size": dev.warp_size,
-        }
-    )
-
-
-def _detect_vulkan(dev: Device) -> Target:
-    f_get_target_property = get_global_func("device_api.vulkan.get_target_property")
-    return Target(
-        {
-            "kind": "vulkan",
-            "max_threads_per_block": dev.max_threads_per_block,
-            "max_shared_memory_per_block": dev.max_shared_memory_per_block,
-            "thread_warp_size": dev.warp_size,
-            "supports_float16": f_get_target_property(dev, "supports_float16"),
-            "supports_int8": f_get_target_property(dev, "supports_int8"),
-            "supports_int16": f_get_target_property(dev, "supports_int16"),
-            "supports_int64": f_get_target_property(dev, "supports_int64"),
-            "supports_8bit_buffer": f_get_target_property(dev, "supports_8bit_buffer"),
-            "supports_16bit_buffer": f_get_target_property(dev, "supports_16bit_buffer"),
-            "supports_storage_buffer_storage_class": f_get_target_property(
-                dev, "supports_storage_buffer_storage_class"
-            ),
-        }
-    )
-
-
 def _detect_cpu(dev: Device) -> Target:  # pylint: disable=unused-argument
     """Detect the host CPU architecture."""
     return Target(
@@ -106,6 +41,19 @@ def _detect_cpu(dev: Device) -> Target:  # pylint: disable=unused-argument
     )
 
 
+SUPPORTED_DEVICE: dict[str, Callable[[Device], Target]] = {
+    "cpu": _detect_cpu,
+}
+
+# Backward-compatible alias for the previous private module-level map.
+SUPPORT_DEVICE = SUPPORTED_DEVICE
+
+
+def register_device_target_detector(device_type: str, detector: Callable[[Device], Target]) -> None:
+    """Register target detection for a runtime device type."""
+    SUPPORTED_DEVICE[device_type] = detector
+
+
 def detect_target_from_device(dev: str | Device) -> Target:
     """Detects Target associated with the given device. If the device does not exist,
     there will be an Error.
@@ -114,7 +62,7 @@ def detect_target_from_device(dev: str | Device) -> Target:
     ----------
     dev : Union[str, Device]
         The device to detect the target for.
-        Supported device types: ["cuda", "metal", "rocm", "vulkan", "opencl"]
+        Supported device types are registered by backend hooks.
 
     Returns
     -------
@@ -124,24 +72,14 @@ def detect_target_from_device(dev: str | Device) -> Target:
     if isinstance(dev, str):
         dev = device(dev)
     device_type = Device._DEVICE_TYPE_TO_NAME[dev.dlpack_device_type()]
-    if device_type not in SUPPORT_DEVICE:
+    if device_type not in SUPPORTED_DEVICE:
         raise ValueError(
             f"Auto detection for device `{device_type}` is not supported. "
-            f"Currently only supports: {SUPPORT_DEVICE.keys()}"
+            f"Currently only supports: {SUPPORTED_DEVICE.keys()}"
         )
     if not dev.exist:
         raise ValueError(
             f"Cannot detect device `{dev}`. Please make sure the device and its driver "
             "is installed properly, and TVM is compiled with the driver"
         )
-    return SUPPORT_DEVICE[device_type](dev)
-
-
-SUPPORT_DEVICE = {
-    "cpu": _detect_cpu,
-    "cuda": _detect_cuda,
-    "metal": _detect_metal,
-    "vulkan": _detect_vulkan,
-    "rocm": _detect_rocm,
-    "opencl": _detect_opencl,
-}
+    return SUPPORTED_DEVICE[device_type](dev)
diff --git a/python/tvm/target/x86.py b/python/tvm/target/x86.py
deleted file mode 100644
index 80399c749bd8..000000000000
--- a/python/tvm/target/x86.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Common x86 related utilities"""
-
-from tvm_ffi import register_global_func
-
-from .codegen import target_has_features
-
-
-@register_global_func("tvm.topi.x86.utils.get_simd_32bit_lanes")
-def get_simd_32bit_lanes():
-    """X86 SIMD optimal vector length lookup.
-    Parameters
-    ----------
-    Returns
-    -------
-     vec_len : int
-        The optimal vector length of CPU from the global context target.
-    """
-    vec_len = 4
-    if target_has_features(["avx512bw", "avx512f"]):
-        vec_len = 16
-    elif target_has_features("avx2"):
-        vec_len = 8
-    return vec_len

From bbf0c4fb197f0c83ff04a41b95784d6ce94e3863 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 14 Jun 2026 19:41:38 +0000
Subject: [PATCH 3/4] [REFACTOR][PYTHON] Move backend lib autoload out of base

---
 python/tvm/__init__.py                   |  9 +++++----
 python/tvm/backend/__init__.py           |  4 ++--
 python/tvm/backend/_autoload_backends.py | 15 +++++++++++++--
 python/tvm/base.py                       | 11 -----------
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
index 01767e193fbc..26a3b9a2c413 100644
--- a/python/tvm/__init__.py
+++ b/python/tvm/__init__.py
@@ -118,10 +118,11 @@ def wrapper(exctype, value, trbk):
 
 sys.excepthook = tvm_wrap_excepthook(sys.excepthook)
 
-# Autoload out-of-tree backends registered under the ``tvm.backends`` entry
-# point group. Runs last, after the core runtime and the tvm namespace are
-# fully initialized, so an extension can safely register into ``tvm.*`` and
-# load extra libraries.
+# Autoload backend runtime libraries and out-of-tree backends registered under
+# the ``tvm.backends`` entry point group. Runs last, after the core runtime and
+# the tvm namespace are fully initialized, so an extension can safely register
+# into ``tvm.*`` and load extra libraries.
 from .backend._autoload_backends import _autoload_backends
 
+backend.autoload_backend_libs()
 _autoload_backends()
diff --git a/python/tvm/backend/__init__.py b/python/tvm/backend/__init__.py
index 1085c538d784..fe48b4fe6831 100644
--- a/python/tvm/backend/__init__.py
+++ b/python/tvm/backend/__init__.py
@@ -25,7 +25,7 @@
 from pkgutil import extend_path
 from typing import Any
 
-from ._autoload_backends import load_all, load_backend_libs
+from ._autoload_backends import autoload_backend_libs, load_all
 
 __path__ = extend_path(__path__, __name__)  # type: ignore[name-defined]
 
@@ -189,4 +189,4 @@ def is_loaded(name: str) -> bool:
     return name in _LOADED_BACKENDS
 
 
-__all__ = ["is_loaded", "load", "load_all", "load_backend_libs"]
+__all__ = ["autoload_backend_libs", "is_loaded", "load", "load_all"]
diff --git a/python/tvm/backend/_autoload_backends.py b/python/tvm/backend/_autoload_backends.py
index 5d8e181df549..7256cf472d03 100644
--- a/python/tvm/backend/_autoload_backends.py
+++ b/python/tvm/backend/_autoload_backends.py
@@ -40,17 +40,28 @@
 _BACKEND_RUNTIME_LIBS = ("cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra")
 
 # Guard so autoload runs at most once per process, even if invoked again.
+_BACKEND_LIBS_LOADED = False
 _AUTO_LOAD_DONE = False
 
 
-def load_backend_libs(runtime_lib_path: str, loaded_libs: dict[str, Any] | None = None) -> None:
+def autoload_backend_libs(loaded_libs: dict[str, Any] | None = None) -> None:
     """Load each known backend runtime DSO into the process-global symbol namespace."""
+    global _BACKEND_LIBS_LOADED
+    if _BACKEND_LIBS_LOADED:
+        return
+
     if loaded_libs is None:
         from tvm.base import _LOADED_LIBS  # pylint: disable=import-outside-toplevel
 
         loaded_libs = _LOADED_LIBS
 
-    runtime_dir = Path(runtime_lib_path).resolve().parent
+    runtime_lib = loaded_libs.get("tvm_runtime")
+    if runtime_lib is None:
+        return
+
+    _BACKEND_LIBS_LOADED = True
+
+    runtime_dir = Path(runtime_lib._name).resolve().parent
     for backend in _BACKEND_RUNTIME_LIBS:
         target_name = f"tvm_runtime_{backend}"
         try:
diff --git a/python/tvm/base.py b/python/tvm/base.py
index cfa7d070c32d..e850f9c214d1 100644
--- a/python/tvm/base.py
+++ b/python/tvm/base.py
@@ -23,7 +23,6 @@
 from tvm_ffi.libinfo import load_lib_ctypes
 
 from . import libinfo
-from .backend._autoload_backends import load_backend_libs as _load_backend_libs
 
 # ----------------------------
 # library loading
@@ -40,22 +39,12 @@
 _LOADED_LIBS = {}
 
 
-def load_backend_libs(runtime_lib_path: str) -> None:
-    """Load backend runtime DSOs through ``tvm.backend`` autoload infrastructure."""
-    _load_backend_libs(runtime_lib_path, _LOADED_LIBS)
-
-
 # runtime is loaded RTLD_GLOBAL to expose its symbols to subsequent loads;
 # compiler is loaded RTLD_LOCAL.
 _LOADED_LIBS["tvm_runtime"] = load_lib_ctypes(
     "tvm", "tvm_runtime", "RTLD_GLOBAL", extra_lib_paths=libinfo.package_lib_paths()
 )
 
-# After libtvm_runtime.so is in the global symbol namespace, scan the same
-# directory for per-backend DSOs (libtvm_runtime_cuda.so, etc.) and load each
-# with RTLD_GLOBAL so their static initializers register device backends.
-_load_backend_libs(_LOADED_LIBS["tvm_runtime"]._name, _LOADED_LIBS)
-
 if not _RUNTIME_ONLY:
     try:
         _LOADED_LIBS["tvm_compiler"] = load_lib_ctypes(

From b88c88e2b6bc6d62d58304f4314725dea9de7c35 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 14 Jun 2026 19:44:08 +0000
Subject: [PATCH 4/4] [REFACTOR][PYTHON] Let backends own runtime sidecar names

---
 python/tvm/backend/_autoload_backends.py | 16 +++++++++++++---
 python/tvm/backend/cuda/__init__.py      |  2 ++
 python/tvm/backend/hexagon/__init__.py   |  3 ++-
 python/tvm/backend/metal/__init__.py     |  2 ++
 python/tvm/backend/opencl/__init__.py    |  4 +++-
 python/tvm/backend/rocm/__init__.py      |  4 +++-
 python/tvm/backend/vulkan/__init__.py    |  4 +++-
 7 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/python/tvm/backend/_autoload_backends.py b/python/tvm/backend/_autoload_backends.py
index 7256cf472d03..a7c63c0c4866 100644
--- a/python/tvm/backend/_autoload_backends.py
+++ b/python/tvm/backend/_autoload_backends.py
@@ -20,6 +20,7 @@
 
 import os
 import warnings
+from importlib import import_module
 from importlib.metadata import entry_points
 from pathlib import Path
 from typing import Any
@@ -37,7 +38,7 @@
     "hexagon",
     "adreno",
 )
-_BACKEND_RUNTIME_LIBS = ("cuda", "vulkan", "opencl", "metal", "rocm", "hexagon", "extra")
+_LEGACY_RUNTIME_LIBS_WITHOUT_BACKEND_PACKAGE = ("extra",)
 
 # Guard so autoload runs at most once per process, even if invoked again.
 _BACKEND_LIBS_LOADED = False
@@ -62,8 +63,8 @@ def autoload_backend_libs(loaded_libs: dict[str, Any] | None = None) -> None:
     _BACKEND_LIBS_LOADED = True
 
     runtime_dir = Path(runtime_lib._name).resolve().parent
-    for backend in _BACKEND_RUNTIME_LIBS:
-        target_name = f"tvm_runtime_{backend}"
+    for runtime_lib_name in _backend_runtime_lib_names():
+        target_name = f"tvm_runtime_{runtime_lib_name}"
         try:
             loaded_libs[target_name] = load_lib_ctypes(
                 package="tvm",
@@ -75,6 +76,15 @@ def autoload_backend_libs(loaded_libs: dict[str, Any] | None = None) -> None:
             pass
 
 
+def _backend_runtime_lib_names() -> tuple[str, ...]:
+    runtime_libs = []
+    for backend in _BUILTIN_BACKENDS:
+        module = import_module(f"tvm.backend.{backend}")
+        runtime_libs.extend(getattr(module, "RUNTIME_LIBS", ()))
+    runtime_libs.extend(_LEGACY_RUNTIME_LIBS_WITHOUT_BACKEND_PACKAGE)
+    return tuple(runtime_libs)
+
+
 def load_all() -> None:
     """Load all in-tree backend Python hooks."""
     from . import load  # pylint: disable=import-outside-toplevel
diff --git a/python/tvm/backend/cuda/__init__.py b/python/tvm/backend/cuda/__init__.py
index 60c34b0e63f3..92b23747e89a 100644
--- a/python/tvm/backend/cuda/__init__.py
+++ b/python/tvm/backend/cuda/__init__.py
@@ -19,6 +19,7 @@
 from importlib import import_module
 
 _LAZY_SUBMODULES = {"lang", "op", "operator", "script", "target_tags"}
+RUNTIME_LIBS = ("cuda",)
 
 
 def _detect_target_from_device(dev):
@@ -80,6 +81,7 @@ def __getattr__(name: str):
     "op",
     "operator",
     "register_backend",
+    "RUNTIME_LIBS",
     "script",
     "script_namespace",
     "script_namespaces",
diff --git a/python/tvm/backend/hexagon/__init__.py b/python/tvm/backend/hexagon/__init__.py
index 3852e36ccc3b..d625cc3671d0 100644
--- a/python/tvm/backend/hexagon/__init__.py
+++ b/python/tvm/backend/hexagon/__init__.py
@@ -19,6 +19,7 @@
 from importlib import import_module
 
 _LAZY_SUBMODULES = {"target_tags"}
+RUNTIME_LIBS = ("hexagon",)
 
 
 def register_backend():
@@ -32,4 +33,4 @@ def __getattr__(name: str):
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
-__all__ = ["register_backend", "target_tags"]
+__all__ = ["register_backend", "RUNTIME_LIBS", "target_tags"]
diff --git a/python/tvm/backend/metal/__init__.py b/python/tvm/backend/metal/__init__.py
index 044807db4be4..d464b5519142 100644
--- a/python/tvm/backend/metal/__init__.py
+++ b/python/tvm/backend/metal/__init__.py
@@ -19,6 +19,7 @@
 from importlib import import_module
 
 _LAZY_SUBMODULES = {"op", "script", "target_tags"}
+RUNTIME_LIBS = ("metal",)
 
 
 def _detect_target_from_device(dev):
@@ -66,6 +67,7 @@ def __getattr__(name: str):
 __all__ = [
     "op",
     "register_backend",
+    "RUNTIME_LIBS",
     "script",
     "script_namespace",
     "script_namespaces",
diff --git a/python/tvm/backend/opencl/__init__.py b/python/tvm/backend/opencl/__init__.py
index c4b4cd71c2c2..e46884e6d12c 100644
--- a/python/tvm/backend/opencl/__init__.py
+++ b/python/tvm/backend/opencl/__init__.py
@@ -16,6 +16,8 @@
 # under the License.
 """OpenCL-owned backend hooks."""
 
+RUNTIME_LIBS = ("opencl",)
+
 
 def _detect_target_from_device(dev):
     from tvm.target import Target  # pylint: disable=import-outside-toplevel
@@ -38,4 +40,4 @@ def register_backend():
     return None
 
 
-__all__ = ["register_backend"]
+__all__ = ["register_backend", "RUNTIME_LIBS"]
diff --git a/python/tvm/backend/rocm/__init__.py b/python/tvm/backend/rocm/__init__.py
index 35eea6972b94..f2f4794da290 100644
--- a/python/tvm/backend/rocm/__init__.py
+++ b/python/tvm/backend/rocm/__init__.py
@@ -16,6 +16,8 @@
 # under the License.
 """ROCm-owned TIRx modules."""
 
+RUNTIME_LIBS = ("rocm",)
+
 
 def _detect_target_from_device(dev):
     from tvm.target import Target  # pylint: disable=import-outside-toplevel
@@ -39,4 +41,4 @@ def register_backend():
     return None
 
 
-__all__ = ["register_backend"]
+__all__ = ["register_backend", "RUNTIME_LIBS"]
diff --git a/python/tvm/backend/vulkan/__init__.py b/python/tvm/backend/vulkan/__init__.py
index b973e331181b..eeaa40082c5d 100644
--- a/python/tvm/backend/vulkan/__init__.py
+++ b/python/tvm/backend/vulkan/__init__.py
@@ -16,6 +16,8 @@
 # under the License.
 """Vulkan-owned backend hooks."""
 
+RUNTIME_LIBS = ("vulkan",)
+
 
 def _detect_target_from_device(dev):
     from tvm import get_global_func  # pylint: disable=import-outside-toplevel
@@ -49,4 +51,4 @@ def register_backend():
     return None
 
 
-__all__ = ["register_backend"]
+__all__ = ["register_backend", "RUNTIME_LIBS"]