|
1 | 1 | """Collection of utilities to detect properties of the underlying architecture.""" |
2 | 2 |
|
| 3 | +from contextlib import suppress |
3 | 4 | from functools import cached_property |
4 | | -from subprocess import PIPE, Popen, DEVNULL, run |
| 5 | +from subprocess import PIPE, Popen, DEVNULL, run, CalledProcessError |
5 | 6 | from pathlib import Path |
6 | 7 | import ctypes |
7 | 8 | import re |
|
11 | 12 |
|
12 | 13 | import cpuinfo |
13 | 14 | import numpy as np |
| 15 | +from packaging.version import parse, InvalidVersion |
14 | 16 | import psutil |
15 | 17 |
|
16 | 18 | from devito.logger import warning |
17 | 19 | from devito.tools import as_tuple, all_equal, memoized_func |
18 | 20 |
|
19 | | -__all__ = ['platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices', |
20 | | - 'get_nvidia_cc', 'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', |
21 | | - 'get_m1_llvm_path', 'get_advisor_path', 'Platform', 'Cpu64', 'Intel64', |
22 | | - 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device', 'NvidiaDevice', |
23 | | - 'AmdDevice', 'IntelDevice', |
24 | | - # Brand-agnostic |
25 | | - 'ANYCPU', 'ANYGPU', |
26 | | - # Intel CPUs |
27 | | - 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210', |
28 | | - 'SKX', 'KLX', 'CLX', 'CLK', 'SPR', |
29 | | - # AMD CPUs |
30 | | - 'AMD', |
31 | | - # ARM CPUs |
32 | | - 'ARM', 'AppleArm', 'M1', 'M2', 'M3', |
33 | | - 'Graviton', 'GRAVITON2', 'GRAVITON3', 'GRAVITON4', |
34 | | - 'Cortex', 'NvidiaArm', 'GRACE', |
35 | | - # Other legacy CPUs |
36 | | - 'POWER8', 'POWER9', |
37 | | - # Generic GPUs |
38 | | - 'AMDGPUX', 'NVIDIAX', 'INTELGPUX', |
39 | | - # Nvidia GPUs |
40 | | - 'VOLTA', 'AMPERE', 'HOPPER', 'BLACKWELL', |
41 | | - # Intel GPUs |
42 | | - 'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550'] |
| 21 | +__all__ = [ |
| 22 | + 'platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices', |
| 23 | + 'get_nvidia_cc', 'get_cuda_path', 'get_cuda_version', 'get_hip_path', |
| 24 | + 'check_cuda_runtime', 'get_m1_llvm_path', 'get_advisor_path', 'Platform', |
| 25 | + 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device', |
| 26 | + 'NvidiaDevice', 'AmdDevice', 'IntelDevice', |
| 27 | + # Brand-agnostic |
| 28 | + 'ANYCPU', 'ANYGPU', |
| 29 | + # Intel CPUs |
| 30 | + 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210', |
| 31 | + 'SKX', 'KLX', 'CLX', 'CLK', 'SPR', |
| 32 | + # AMD CPUs |
| 33 | + 'AMD', |
| 34 | + # ARM CPUs |
| 35 | + 'ARM', 'AppleArm', 'M1', 'M2', 'M3', |
| 36 | + 'Graviton', 'GRAVITON2', 'GRAVITON3', 'GRAVITON4', |
| 37 | + 'Cortex', 'NvidiaArm', 'GRACE', |
| 38 | + # Other legacy CPUs |
| 39 | + 'POWER8', 'POWER9', |
| 40 | + # Generic GPUs |
| 41 | + 'AMDGPUX', 'NVIDIAX', 'INTELGPUX', |
| 42 | + # Nvidia GPUs |
| 43 | + 'VOLTA', 'AMPERE', 'HOPPER', 'BLACKWELL', |
| 44 | + # Intel GPUs |
| 45 | + 'PVC', 'INTELGPUMAX', 'MAX1100', 'MAX1550' |
| 46 | +] |
43 | 47 |
|
44 | 48 |
|
45 | 49 | @memoized_func |
@@ -553,6 +557,30 @@ def get_cuda_path(): |
553 | 557 | return None |
554 | 558 |
|
555 | 559 |
|
| 560 | +@memoized_func |
| 561 | +def get_cuda_version(): |
| 562 | + cuda_home = get_cuda_path() |
| 563 | + if cuda_home is None: |
| 564 | + nvc_version_command = ['nvcc', '--version'] |
| 565 | + else: |
| 566 | + nvc_version_command = [f'{cuda_home}/bin/nvcc', '--version'] |
| 567 | + |
| 568 | + cuda_version = None |
| 569 | + try: |
| 570 | + out = run(nvc_version_command, capture_output=True, text=True) |
| 571 | + except (FileNotFoundError, CalledProcessError): |
| 572 | + pass |
| 573 | + finally: |
| 574 | + if out.returncode == 0: |
| 575 | + start = out.stdout.find('release') |
| 576 | + start = out.stdout.find(',', start) + 1 |
| 577 | + stop = out.stdout.find('\n', start) |
| 578 | + with suppress(InvalidVersion): |
| 579 | + cuda_version = parse(out.stdout[start:stop]) |
| 580 | + |
| 581 | + return cuda_version |
| 582 | + |
| 583 | + |
556 | 584 | @memoized_func |
557 | 585 | def get_advisor_path(): |
558 | 586 | """ |
@@ -619,28 +647,35 @@ def get_m1_llvm_path(language): |
619 | 647 |
|
620 | 648 | @memoized_func |
621 | 649 | def check_cuda_runtime(): |
622 | | - libnames = ('libcudart.so', 'libcudart.dylib', 'cudart.dll') |
623 | | - for libname in libnames: |
624 | | - try: |
625 | | - cuda = ctypes.CDLL(libname) |
626 | | - except OSError: |
627 | | - continue |
628 | | - else: |
629 | | - break |
630 | | - else: |
| 650 | + libname = ctypes.util.find_library("cudart") |
| 651 | + if not libname: |
631 | 652 | warning("Unable to check compatibility of NVidia driver and runtime") |
632 | 653 | return |
633 | 654 |
|
| 655 | + cuda = ctypes.CDLL(libname) |
634 | 656 | driver_version = ctypes.c_int() |
635 | 657 | runtime_version = ctypes.c_int() |
636 | 658 |
|
637 | 659 | if cuda.cudaDriverGetVersion(ctypes.byref(driver_version)) == 0 and \ |
638 | 660 | cuda.cudaRuntimeGetVersion(ctypes.byref(runtime_version)) == 0: |
639 | 661 | driver_version = driver_version.value |
640 | 662 | runtime_version = runtime_version.value |
641 | | - if driver_version < runtime_version: |
642 | | - warning("The NVidia driver (v%d) on this system may not be compatible " |
643 | | - "with the CUDA runtime (v%d)" % (driver_version, runtime_version)) |
| 663 | + |
| 664 | + driver_v = parse(str(driver_version/1000)) |
| 665 | + runtime_v = parse(str(runtime_version/1000)) |
| 666 | + # First check the "major" version, known to be incompatible |
| 667 | + if driver_v.major < runtime_v.major: |
| 668 | + raise RuntimeError( |
| 669 | + f'The NVidia driver (v{driver_version}) on this system is ' |
| 670 | + f'not compatible with the CUDA runtime (v{runtime_version})' |
| 671 | + ) |
| 672 | + # Next check the version including minor revisions which may still |
| 673 | + # be compatible |
| 674 | + elif driver_v < runtime_v: |
| 675 | + warning( |
| 676 | + f'The NVidia driver (v{driver_version}) on this system may ' |
| 677 | + f'not be compatible with the CUDA runtime (v{runtime_version})' |
| 678 | + ) |
644 | 679 | else: |
645 | 680 | warning("Unable to check compatibility of NVidia driver and runtime") |
646 | 681 |
|
@@ -1069,6 +1104,32 @@ def march(self): |
1069 | 1104 | return 'tesla' |
1070 | 1105 | return None |
1071 | 1106 |
|
| 1107 | + @cached_property |
| 1108 | + def max_shm_per_block(self): |
| 1109 | + """ |
| 1110 | + Get the maximum amount of shared memory per thread block |
| 1111 | + """ |
| 1112 | + # Load libcudart |
| 1113 | + libname = ctypes.util.find_library("cudart") |
| 1114 | + if not libname: |
| 1115 | + return 64 * 1024 # 64 KB default |
| 1116 | + lib = ctypes.CDLL(libname) |
| 1117 | + |
| 1118 | + cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 |
| 1119 | + # get current device |
| 1120 | + dev = ctypes.c_int() |
| 1121 | + lib.cudaGetDevice(ctypes.byref(dev)) |
| 1122 | + |
| 1123 | + # query attribute |
| 1124 | + value = ctypes.c_int() |
| 1125 | + lib.cudaDeviceGetAttribute( |
| 1126 | + ctypes.byref(value), |
| 1127 | + ctypes.c_int(cudaDevAttrMaxSharedMemoryPerBlockOptin), |
| 1128 | + dev |
| 1129 | + ) |
| 1130 | + |
| 1131 | + return value.value |
| 1132 | + |
1072 | 1133 | def supports(self, query, language=None): |
1073 | 1134 | if language != 'cuda': |
1074 | 1135 | return False |
@@ -1125,6 +1186,8 @@ class AmdDevice(Device): |
1125 | 1186 |
|
1126 | 1187 | max_mem_trans_nbytes = 256 |
1127 | 1188 |
|
| 1189 | + max_shm_per_block = 64*1024 # 64 KB |
| 1190 | + |
1128 | 1191 | @cached_property |
1129 | 1192 | def march(cls): |
1130 | 1193 | # TODO: this corresponds to Vega, which acts as the fallback `march` |
|
0 commit comments