Skip to content

Commit ed47b74

Browse files
committed
compiler: Further misc improvements (minor)
1 parent 969040d commit ed47b74

4 files changed

Lines changed: 13 additions & 3 deletions

File tree

devito/ir/clusters/algorithms.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def callback(self, clusters, prefix, backlog=None, known_break=None):
157157
# parallelism
158158
for i in range(1, len(clusters)):
159159
# FIXME: This eats a lot of time (four seconds each time)
160+
# FIXME: Pull scope out of this
160161
if self._break_for_parallelism(scope, candidates, i):
161162
return self.callback(clusters[:i], prefix, clusters[i:] + backlog,
162163
candidates | known_break)
@@ -194,6 +195,7 @@ def _break_for_parallelism(self, scope, candidates, i):
194195
# break parallelism
195196

196197
# TODO: Can this loop be made to short-circuit?
198+
# TODO: Most of the time is burned in d_from_access_gen
197199
test = False
198200
for d in scope.d_from_access_gen(scope.a_query(i)):
199201
if d.is_local or d.is_storage_related(candidates):

devito/ir/support/basic.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ def lex_le(self, other):
317317
def lex_lt(self, other):
318318
return self.timestamp < other.timestamp
319319

320+
# NOTE: This is called a lot with the same arguments - memoize yields mild speedup
321+
@memoized_meth
320322
def distance(self, other):
321323
"""
322324
Compute the distance from ``self`` to ``other``.
@@ -365,7 +367,9 @@ def distance(self, other):
365367
# constant access at 4
366368
for v in (self[n], other[n]):
367369
try:
368-
if bool(v < sit.symbolic_min or v > sit.symbolic_max):
370+
# NOTE: Split the boolean to make the conditional short circuit
371+
# more frequently for mild speedup
372+
if bool(v < sit.symbolic_min) or bool(v > sit.symbolic_max):
369373
return Vector(S.ImaginaryUnit)
370374
except TypeError:
371375
pass
@@ -1166,6 +1170,7 @@ def d_from_access_gen(self, accesses):
11661170
Generate all flow, anti, and output dependences involving any of
11671171
the given TimedAccess objects.
11681172
"""
1173+
# FIXME: This seems to be a hotspot
11691174
accesses = as_tuple(accesses)
11701175
for d in self.d_all_gen():
11711176
for i in accesses:

devito/passes/clusters/misc.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,7 @@ def is_cross(source, sink):
352352
v = len(cg0.exprs)
353353
return t0 < v <= t1 or t1 < v <= t0
354354

355-
for cg1 in cgroups[n+1:]:
356-
n1 = cgroups.index(cg1)
355+
for n1, cg1 in enumerate(cgroups[n+1:], start=n+1):
357356

358357
# A Scope to compute all cross-ClusterGroup anti-dependences
359358
scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=is_cross)

devito/passes/iet/mpi.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def optimize_halospots(iet, **kwargs):
2424
merged and moved around in order to improve the halo exchange performance.
2525
"""
2626
iet = _drop_reduction_halospots(iet)
27+
# TODO: _hoist_invariant and _merge_halospots are both slow
2728
iet = _hoist_invariant(iet)
2829
iet = _merge_halospots(iet)
2930
iet = _drop_if_unwritten(iet, **kwargs)
@@ -81,6 +82,8 @@ def _hoist_invariant(iet):
8182
cond_mapper = _make_cond_mapper(iet)
8283
iter_mapper = _filter_iter_mapper(iet)
8384

85+
# from IPython import embed; embed()
86+
8487
for it, halo_spots in iter_mapper.items():
8588
for hs0, hs1 in combinations(halo_spots, r=2):
8689
if _check_control_flow(hs0, hs1, cond_mapper):
@@ -157,6 +160,7 @@ def _merge_halospots(iet):
157160

158161
for f, v in hs1.fmapper.items():
159162
for dep in scope.d_flow.project(f):
163+
# NOTE: The else clause gets skipped if this is ever true
160164
if not any(rule(dep, hs1, v.loc_indices) for rule in rules):
161165
break
162166
else:

0 commit comments

Comments
 (0)