Skip to content

Commit caaac39

Browse files
authored
Merge pull request #2531 from devitocodes/arm_cortex
compiler: Support compiler flags for Cortex
2 parents 2e6c705 + 990c187 commit caaac39

2 files changed

Lines changed: 67 additions & 53 deletions

File tree

devito/arch/archinfo.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,12 @@
2424
# Intel CPUs
2525
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
2626
'SKX', 'KLX', 'CLX', 'CLK', 'SPR',
27+
# AMD CPUs
28+
'AMD',
2729
# ARM CPUs
28-
'AMD', 'ARM', 'AppleArm', 'M1', 'M2', 'M3',
30+
'ARM', 'AppleArm', 'M1', 'M2', 'M3',
2931
'Graviton', 'GRAVITON2', 'GRAVITON3', 'GRAVITON4',
32+
'Cortex',
3033
# Other legacy CPUs
3134
'POWER8', 'POWER9',
3235
# Generic GPUs
@@ -226,7 +229,7 @@ def homogenise_gpus(gpu_infos):
226229
for i in ['total', 'free', 'used']:
227230
def make_cbk(i):
228231
def cbk(deviceid=0):
229-
info_cmd = ['nvidia-smi', '--query-gpu=memory.%s' % i, '--format=csv']
232+
info_cmd = ['nvidia-smi', f'--query-gpu=memory.{i}', '--format=csv']
230233
proc = Popen(info_cmd, stdout=PIPE, stderr=DEVNULL)
231234
raw_info = str(proc.stdout.read())
232235

@@ -248,7 +251,7 @@ def cbk(deviceid=0):
248251

249252
return cbk
250253

251-
gpu_info['mem.%s' % i] = make_cbk(i)
254+
gpu_info[f'mem.{i}'] = make_cbk(i)
252255

253256
return gpu_info
254257

@@ -303,10 +306,10 @@ def make_cbk(i):
303306
def cbk(deviceid=0):
304307
try:
305308
# Should only contain Used and total
306-
assert len(info['card%s' % deviceid]) == 2
307-
used = [int(v) for k, v in info['card%s' % deviceid].items()
309+
assert len(info[f'card{deviceid}']) == 2
310+
used = [int(v) for k, v in info[f'card{deviceid}'].items()
308311
if 'Used' in k][0]
309-
total = [int(v) for k, v in info['card%s' % deviceid].items()
312+
total = [int(v) for k, v in info[f'card{deviceid}'].items()
310313
if 'Used' not in k][0]
311314
free = total - used
312315
return {'total': total, 'free': free, 'used': used}[i]
@@ -318,7 +321,7 @@ def cbk(deviceid=0):
318321

319322
return cbk
320323

321-
gpu_info['mem.%s' % i] = make_cbk(i)
324+
gpu_info[f'mem.{i}'] = make_cbk(i)
322325

323326
gpu_infos['architecture'] = 'AMD'
324327
return gpu_info
@@ -737,7 +740,7 @@ def numa_domains(self):
737740
try:
738741
return int(lscpu()['NUMA node(s)'])
739742
except (ValueError, TypeError, KeyError):
740-
warning("NUMA domain count autodetection failed")
743+
warning("NUMA domain count autodetection failed, assuming 1")
741744
return 1
742745

743746
@cached_property
@@ -793,6 +796,21 @@ def march(self):
793796
return 'neoverse-n1'
794797

795798

799+
class Cortex(Arm):
800+
801+
@property
802+
def version(self):
803+
return int(self.name.split('cortexa')[-1])
804+
805+
@cached_property
806+
def march(self):
807+
return 'armv8-a+crc+simd'
808+
809+
@cached_property
810+
def mtune(self):
811+
return f'cortex-a{self.version}'
812+
813+
796814
class Amd(Cpu64):
797815

798816
known_isas = ('cpp', 'sse', 'avx', 'avx2')
@@ -1007,6 +1025,8 @@ def march(cls):
10071025
M1 = AppleArm('m1')
10081026
M2 = AppleArm('m2')
10091027
M3 = AppleArm('m3')
1028+
CORTEX = Cortex('cortex')
1029+
CORTEXA76 = Cortex('cortexa76')
10101030

10111031
AMD = Amd('amd')
10121032

devito/arch/compiler.py

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
call_capture_output as _call_capture_output)
1515

1616
from devito.arch import (AMDGPUX, Cpu64, AppleArm, NvidiaDevice, POWER8, POWER9,
17-
Graviton, IntelDevice, get_nvidia_cc, check_cuda_runtime,
18-
get_m1_llvm_path)
17+
Graviton, Cortex, IntelDevice, get_nvidia_cc,
18+
check_cuda_runtime, get_m1_llvm_path)
1919
from devito.exceptions import CompilationError
2020
from devito.logger import debug, warning
2121
from devito.parameters import configuration
@@ -45,7 +45,7 @@ def sniff_compiler_version(cc, allow_fail=False):
4545
if allow_fail:
4646
return Version("0")
4747
else:
48-
raise RuntimeError("The `%s` compiler isn't available on this system" % cc)
48+
raise RuntimeError(f"The `{cc}` compiler isn't available on this system")
4949

5050
ver = ver.strip()
5151
if ver.startswith("gcc"):
@@ -190,8 +190,7 @@ def __init__(self, **kwargs):
190190
self.suffix = kwargs.get('suffix')
191191
if not kwargs.get('mpi'):
192192
self.cc = self.CC if self._cpp is False else self.CXX
193-
self.cc = self.cc if self.suffix is None else ('%s-%s' %
194-
(self.cc, self.suffix))
193+
self.cc = self.cc if self.suffix is None else f'{self.cc}-{self.suffix}'
195194
else:
196195
self.cc = self.MPICC if self._cpp is False else self.MPICXX
197196
self.ld = self.cc # Wanted by the superclass
@@ -214,7 +213,7 @@ def __init__(self, **kwargs):
214213
elif platform.system() == "Windows":
215214
self.so_ext = '.dll'
216215
else:
217-
raise NotImplementedError("Unsupported platform %s" % platform)
216+
raise NotImplementedError(f"Unsupported platform {platform}")
218217

219218
self.__init_finalize__(**kwargs)
220219

@@ -291,20 +290,19 @@ def save(self, soname, binary):
291290
"""
292291
sofile = self.get_jit_dir().joinpath(soname).with_suffix(self.so_ext)
293292
if sofile.is_file():
294-
debug("%s: `%s` was not saved in `%s` as it already exists"
295-
% (self, sofile.name, self.get_jit_dir()))
293+
debug(f"{self}: `{sofile.name}` was not saved in `{self.get_jit_dir()}`"
294+
" as it already exists")
296295
else:
297296
makedirs(self.get_jit_dir(), exist_ok=True)
298297
with open(str(sofile), 'wb') as f:
299298
f.write(binary)
300-
debug("%s: `%s` successfully saved in `%s`"
301-
% (self, sofile.name, self.get_jit_dir()))
299+
debug(f"{self}: `{sofile.name}` successfully saved in `{self.get_jit_dir()}`")
302300

303301
def make(self, loc, args):
304302
"""Invoke the ``make`` command from within ``loc`` with arguments ``args``."""
305303
hash_key = sha1((loc + str(args)).encode()).hexdigest()
306-
logfile = path.join(self.get_jit_dir(), "%s.log" % hash_key)
307-
errfile = path.join(self.get_jit_dir(), "%s.err" % hash_key)
304+
logfile = path.join(self.get_jit_dir(), f"{hash_key}.log")
305+
errfile = path.join(self.get_jit_dir(), f"{hash_key}.err")
308306

309307
with change_directory(loc):
310308
with open(logfile, "w") as lf:
@@ -317,12 +315,12 @@ def make(self, loc, args):
317315
try:
318316
check_call(command, stderr=ef, stdout=lf)
319317
except CalledProcessError as e:
320-
raise CompilationError('Command "%s" return error status %d. '
321-
'Unable to compile code.\n'
322-
'Compile log in %s\n'
323-
'Compile errors in %s\n' %
324-
(e.cmd, e.returncode, logfile, errfile))
325-
debug("Make <%s>" % " ".join(args))
318+
raise CompilationError(f'Command "{e.cmd}" return error status'
319+
f'{e.returncode}. '
320+
f'Unable to compile code.\n'
321+
f'Compile log in {logfile}\n'
322+
f'Compile errors in {errfile}\n')
323+
debug(f"Make <{' '.join(args)}>")
326324

327325
def jit_compile(self, soname, code):
328326
"""
@@ -340,7 +338,7 @@ def jit_compile(self, soname, code):
340338
The source code to be JIT compiled.
341339
"""
342340
target = str(self.get_jit_dir().joinpath(soname))
343-
src_file = "%s.%s" % (target, self.src_ext)
341+
src_file = f"{target}.{self.src_ext}"
344342

345343
cache_dir = self.get_codepy_dir().joinpath(soname[:7])
346344
if configuration['jit-backdoor'] is False:
@@ -353,15 +351,15 @@ def jit_compile(self, soname, code):
353351
try:
354352
with open(src_file, 'r') as f:
355353
code = f.read()
356-
code = ''.join([code, '/* Backdoor edit at %s*/ \n' % time.ctime()])
354+
code = f'{code}/* Backdoor edit at {time.ctime()}*/ \n'
357355
# Bypass the devito JIT cache
358356
# Note: can't simply use Python's `mkdtemp()` as, with MPI, different
359357
# ranks would end up creating different cache dirs
360358
cache_dir = cache_dir.joinpath('jit-backdoor')
361359
cache_dir.mkdir(parents=True, exist_ok=True)
362360
except FileNotFoundError:
363-
raise ValueError("Trying to use the JIT backdoor for `%s`, but "
364-
"the file isn't present" % src_file)
361+
raise ValueError(f"Trying to use the JIT backdoor for `{src_file}`, but "
362+
"the file isn't present")
365363

366364
# Should the compilation command be emitted?
367365
debug = configuration['log-level'] == 'DEBUG'
@@ -392,7 +390,7 @@ def __str__(self):
392390
return self.__class__.__name__
393391

394392
def __repr__(self):
395-
return "JITCompiler[%s]" % self.__class__.__name__
393+
return f"JITCompiler[{self.__class__.__name__}]"
396394

397395
def __getstate__(self):
398396
# The superclass would otherwise only return a subset of attributes
@@ -406,7 +404,7 @@ def add_library_dirs(self, dirs, rpath=False):
406404
if rpath:
407405
# Add rpath flag to embed library dir
408406
for d in as_list(dirs):
409-
self.ldflags.append('-Wl,-rpath,%s' % d)
407+
self.ldflags.append(f'-Wl,-rpath,{d}')
410408

411409
def add_libraries(self, libs):
412410
self.libraries = filter_ordered(self.libraries + as_list(libs))
@@ -442,7 +440,10 @@ def __init_finalize__(self, **kwargs):
442440
# -march isn't supported on power architectures, is -mtune needed?
443441
self.cflags = ['-mcpu=native'] + self.cflags
444442
elif isinstance(platform, Graviton):
445-
self.cflags = ['-mcpu=%s' % platform.march] + self.cflags
443+
self.cflags = [f'-mcpu={platform.march}'] + self.cflags
444+
elif isinstance(platform, Cortex):
445+
self.cflags += [f'-march={platform.march}']
446+
self.cflags += [f'-mtune={platform.mtune}']
446447
else:
447448
self.cflags = ['-march=native'] + self.cflags
448449

@@ -465,14 +466,7 @@ def __lookup_cmds__(self):
465466

466467

467468
class ArmCompiler(GNUCompiler):
468-
469-
def __init_finalize__(self, **kwargs):
470-
GNUCompiler.__init_finalize__(self, **kwargs)
471-
platform = kwargs.pop('platform', configuration['platform'])
472-
473-
# Graviton flag
474-
if isinstance(platform, Graviton):
475-
self.cflags += ['-mcpu=%s' % platform.march]
469+
pass
476470

477471

478472
class ClangCompiler(Compiler):
@@ -493,7 +487,7 @@ def __init_finalize__(self, **kwargs):
493487
if language in ['C', 'openmp']:
494488
cc = get_nvidia_cc()
495489
if cc:
496-
self.cflags += ['-Xopenmp-target', '-march=sm_%s' % cc]
490+
self.cflags += ['-Xopenmp-target', f'-march=sm_{cc}']
497491
self.ldflags += ['-fopenmp', '-fopenmp-targets=nvptx64-nvidia-cuda']
498492
elif platform is AMDGPUX:
499493
self.cflags.remove('-std=c99')
@@ -503,7 +497,7 @@ def __init_finalize__(self, **kwargs):
503497
self.ldflags += ['-fopenmp',
504498
'-fopenmp-targets=amdgcn-amd-amdhsa',
505499
'-Xopenmp-target=amdgcn-amd-amdhsa']
506-
self.ldflags += ['-march=%s' % platform.march]
500+
self.ldflags += [f'-march={platform.march}']
507501
elif isinstance(platform, AppleArm):
508502
# NOTE:
509503
# Apple Mx supports OpenMP through Apple's LLVM compiler.
@@ -512,9 +506,9 @@ def __init_finalize__(self, **kwargs):
512506
llvmm1 = get_m1_llvm_path(language)
513507
if llvmm1 and language == 'openmp':
514508
mx = platform.march
515-
self.ldflags += ['-mcpu=apple-%s' % mx,
516-
'-fopenmp', '-L%s' % llvmm1['libs']]
517-
self.cflags += ['-Xclang', '-I%s' % llvmm1['include']]
509+
self.ldflags += [f'-mcpu=apple-{mx}',
510+
'-fopenmp', f'-L{llvmm1["libs"]}']
511+
self.cflags += ['-Xclang', f'-I{llvmm1["include"]}']
518512
else:
519513
if platform in [POWER8, POWER9]:
520514
# -march isn't supported on power architectures
@@ -563,7 +557,7 @@ def __init_finalize__(self, **kwargs):
563557
if language in ['C', 'openmp']:
564558
self.ldflags += ['-target', 'x86_64-pc-linux-gnu']
565559
self.ldflags += ['-fopenmp']
566-
self.ldflags += ['--offload-arch=%s' % platform.march]
560+
self.ldflags += [f'--offload-arch={platform.march}']
567561
elif platform in [POWER8, POWER9]:
568562
# It doesn't make much sense to use AOMP on Power, but it should work
569563
self.cflags.append('-mcpu=native')
@@ -776,15 +770,15 @@ def __init_intel_mpi__(self, **kwargs):
776770
# whatever the MPI distro is
777771
mpi_distro = sniff_mpi_distro('mpiexec')
778772
if mpi_distro != 'IntelMPI':
779-
warning("Expected Intel MPI distribution with `%s`, but found `%s`"
780-
% (self.__class__.__name__, mpi_distro))
773+
warning(f"Expected Intel MPI distribution with `{self.__class__.__name__}`,"
774+
f"but found `{mpi_distro}`")
781775

782776
def __init_intel_mpi_flags__(self, **kwargs):
783-
self.cflags.insert(0, '-cc=%s' % self.CC)
777+
self.cflags.insert(0, f'-cc={self.CC}')
784778

785779
def get_version(self):
786780
if configuration['mpi']:
787-
cmd = (self.cc, "-cc=%s" % self.CC, "--version")
781+
cmd = (self.cc, f"-cc={self.CC}", "--version")
788782
else:
789783
cmd = (self.cc, "--version")
790784
result, stdout, stderr = call_capture_output(cmd)
@@ -803,7 +797,7 @@ def __lookup_cmds__(self):
803797
# we try to use `mpiicc` first, while `mpicc` is our fallback, which may
804798
# or may not be an Intel distribution
805799
try:
806-
check_output(["mpiicc", "-cc=%s" % self.CC, "--version"]).decode("utf-8")
800+
check_output(["mpiicc", f"-cc={self.CC}", "--version"]).decode("utf-8")
807801
self.MPICC = 'mpiicc'
808802
self.MPICXX = 'mpicxx'
809803
except FileNotFoundError:
@@ -905,7 +899,7 @@ def __init_finalize__(self, **kwargs):
905899
elif isinstance(platform, IntelDevice):
906900
self.cflags.append('-fsycl-targets=spir64')
907901
else:
908-
raise NotImplementedError("Unsupported platform %s" % platform)
902+
raise NotImplementedError(f"Unsupported platform {platform}")
909903

910904

911905
class CustomCompiler(Compiler):

0 commit comments

Comments
 (0)