compiler: Change functioning of memory estimate to be more parseable

EdCaunt · EdCaunt · commit 0d57ce8ef67f · 2025-07-18T09:08:48.000+01:00
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
@@ -32,7 +32,8 @@
 from devito.symbolics import estimate_cost, subs_op_args
 from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple,
                           flatten, filter_sorted, frozendict, is_integer,
-                          split, timed_pass, timed_region, contains_val, humanbytes)
+                          split, timed_pass, timed_region, contains_val,
+                          MemoryEstimate)
 from devito.types import (Buffer, Evaluable, host_layer, device_layer,
                           disk_layer)
 from devito.types.dimension import Thickness
@@ -870,49 +871,54 @@ def cinterface(self, force=False):
     def __call__(self, **kwargs):
         return self.apply(**kwargs)
 
-    def estimate_memory(self, human_readable=True, **kwargs):
+    def estimate_memory(self, **kwargs):
         """
-        Estimate the memory consumed by the Operator.
+        Estimate the memory consumed by the Operator without touching or allocating any
+        data. This interface is designed to mimic `Operator.apply(**kwargs)` and can be
+        called with the kwargs for a prospective operator execution. With no arguments,
+        it will simply estimate memory for the default operator parameters. However, if
+        desired, overrides can be supplied (as per `apply`) and these will be used for
+        the memory estimate.
+
+        If estimating memory for an Operator which is expected to allocate large arrays,
+        it is strongly recommended that one avoids touching the data in Python (thus
+        avoiding allocation). `AbstractFunction` types have their data allocated lazily -
+        the underlying array is only created at the point at which the `data`,
+        `data_with_halo`, etc, attributes are first accessed. Thus by avoiding accessing
+        such attributes in the memory estimation script, one can check the nominal memory
+        usage of proposed operators far larger than will fit in system DRAM.
+
+        Note that this estimate will build the Operator in order to factor in memory
+        allocation for array temporaries and buffers generated during compilation.
 
-        TODO: Finish this docstring
+        Parameters
+        ----------
+        human_readable: bool
+            Return human-readable values, rather than raw byte counts. Default is False.
+        **kwargs: dict
+            As per `Operator.apply()`.
+
+        Returns
+        -------
+        summary: MemoryEstimate
+            An estimate of memory consumed in each of the specified locations.
         """
         # Build the arguments list for which to get the memory consumption
         # This is so that the estimate will factor in overrides
         args = self._prepare_arguments(estimate_memory=True, **kwargs)
         mem = args.nbytes_consumed
 
-        # Extra information for enhanced operators
-        extras = self._enrich_memreport(args, human_readable=human_readable)
-
-        if human_readable:
-            headline = f"Memory consumption for operator `{self.name}`:"
-            w = len(headline)
-            # Columns are width 10
-            fhost = str(humanbytes(mem[host_layer])).center(10)
-            fdevice = str(humanbytes(mem[device_layer])).center(10)
-
-            memreport = (
-                "\n"
-                f"{headline}\n"
-                f"{'┌──────────┬──────────┐'.center(w)}\n"
-                f"{'│   Host   │  Device  │'.center(w)}\n"
-                f"{'├──────────┼──────────┤'.center(w)}\n"
-                f"{f'│{fhost}│{fdevice}│'.center(w)}\n"
-                f"{'└──────────┴──────────┘'.center(w)}\n"
-            )
+        memreport = {'host': mem[host_layer], 'device': mem[device_layer]}
 
-            # TODO: add hinting if the specified operator won't fit
-        else:
-            memreport = f"{self.name} {mem[host_layer]} {mem[device_layer]}"
+        # Extra information for enriched operators
+        extras = self._enrich_memreport(args)
+        memreport.update(extras)
 
-        if extras is not None:
-            memreport += extras
+        return MemoryEstimate(memreport, name=self.name)
 
-        info(memreport)
-
-    def _enrich_memreport(self, args, human_readable=True):
-        # Hook for enriching memory report
-        pass
+    def _enrich_memreport(self, args):
+        # Hook for enriching memory report with additional metadata
+        return {}
 
     def apply(self, **kwargs):
         """
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
@@ -1,17 +1,17 @@
 from collections import OrderedDict, deque
 from collections.abc import Callable, Iterable, MutableSet, Mapping, Set
-from functools import reduce
+from functools import reduce, cached_property
 
 import numpy as np
 from multidict import MultiDict
 
 from devito.tools import Pickable
-from devito.tools.utils import as_tuple, filter_ordered
+from devito.tools.utils import as_tuple, filter_ordered, humanbytes
 from devito.tools.algorithms import toposort
 
 __all__ = ['Bunch', 'EnrichedTuple', 'ReducerMap', 'DefaultOrderedDict',
            'OrderedSet', 'Ordering', 'DAG', 'frozendict',
-           'UnboundTuple', 'UnboundedMultiTuple']
+           'UnboundTuple', 'UnboundedMultiTuple', 'MemoryEstimate']
 
 
 class Bunch:
@@ -660,6 +660,31 @@ def __hash__(self):
         return self._hash
 
 
+class MemoryEstimate(frozendict):
+    """
+    An immutable wrapper for a memory estimate, showing the
+    various values.
+
+    TODO: Finish this docstring
+    """
+
+    def __init__(self, *args, **kwargs):
+        self._name = kwargs.pop('name', 'memory_estimate')
+        super().__init__(*args, **kwargs)
+
+    @property
+    def name(self):
+        return self._name
+
+    @cached_property
+    def human_readable(self):
+        """The memory estimate in human-readable format"""
+        return frozendict({k: humanbytes(v) for k, v in self.items()})
+
+    def __repr__(self):
+        return f'{self.__class__.__name__}({self.name}): {self.human_readable._dict}'
+
+
 class UnboundTuple(tuple):
     """
     An UnboundedTuple is a tuple that can be
diff --git a/devito/types/dense.py b/devito/types/dense.py
@@ -810,7 +810,7 @@ def _arg_defaults(self, alias=None, metadata=None, estimate_memory=False):
             To bind the argument values to different names.
         """
         key = alias or self
-        # TODO: Tidy this up. The idea is to avoid touching the data
+        # Avoid touching the data if just estimating memory usage
         if estimate_memory:
             args = ReducerMap({key.name: self})
         else:
diff --git a/tests/test_operator.py b/tests/test_operator.py
@@ -2068,16 +2068,9 @@ class TestEstimateMemory:
 
     _array_temp = "r0L0(x, y)" if "CXX" in configuration['language'] else "r0[x][y]"
 
-    def parse_output(self, output, expected):
+    def parse_output(self, summary, expected):
         """Parse estimate_memory machine-readable output"""
-        # Check that no allocation occurs as estimate_memory should avoid data touch
-        assert "Allocating" not in output.text
-
-        parsed = output.records[-1].message.split()
-        name, host, device = parsed[:3]
-        extracted = (name, int(host), int(device))
-
-        assert extracted == expected
+        assert (summary['host'], summary['device']) == expected
 
     @pytest.mark.parametrize('shape', [(11,), (101, 101), (101, 101, 101)])
     @pytest.mark.parametrize('dtype', [np.int8, np.int16, np.float32,
@@ -2089,13 +2082,14 @@ def test_basic_usage(self, caplog, shape, dtype, so):
         with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
             op = Operator(Eq(f, 1))
 
-            # Machine-readable output for parsing
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            # Check that no allocation occurs as estimate_memory should avoid data touch
+            assert "Allocating" not in caplog.text
 
             # Check output of estimate_memory
             host = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
-            expected = ("Kernel", host, 0)
-            self.parse_output(caplog, expected)
+            expected = (host, 0)
+            self.parse_output(summary, expected)
 
     def test_multiple_objects(self, caplog):
         grid = Grid(shape=(101, 101))
@@ -2104,12 +2098,13 @@ def test_multiple_objects(self, caplog):
         g = Function(name='g', grid=grid, space_order=4, dtype=np.float64)
         with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
             op = Operator([Eq(f, 1), Eq(g, 1)])
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            assert "Allocating" not in caplog.text
 
             check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
                         for func in (f, g))
-            expected = ("Kernel", check, 0)
-            self.parse_output(caplog, expected)
+            expected = (check, 0)
+            self.parse_output(summary, expected)
 
     @pytest.mark.parametrize('time', [True, False])
     def test_sparse(self, caplog, time):
@@ -2123,12 +2118,13 @@ def test_sparse(self, caplog, time):
 
         with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
             op = Operator(src_term)
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            assert "Allocating" not in caplog.text
 
             check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
                         for func in (f, src, src.coordinates))
-            expected = ("Kernel", check, 0)
-            self.parse_output(caplog, expected)
+            expected = (check, 0)
+            self.parse_output(summary, expected)
 
     @pytest.mark.parametrize('save', [None, Buffer(3), 10])
     def test_timefunction(self, caplog, save):
@@ -2137,10 +2133,11 @@ def test_timefunction(self, caplog, save):
 
         with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
             op = Operator(Eq(f, 1))
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            assert "Allocating" not in caplog.text
             check = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
-            expected = ("Kernel", check, 0)
-            self.parse_output(caplog, expected)
+            expected = (check, 0)
+            self.parse_output(summary, expected)
 
     def test_mashup(self, caplog):
         grid = Grid(shape=(101, 101))
@@ -2158,13 +2155,14 @@ def test_mashup(self, caplog):
 
         with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
             op = Operator([eq0, eq1] + src_term0 + src_term1)
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            assert "Allocating" not in caplog.text
 
             check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
                         for func in (f, g, src0, src0.coordinates,
                                      src1, src1.coordinates))
-            expected = ("Kernel", check, 0)
-            self.parse_output(caplog, expected)
+            expected = (check, 0)
+            self.parse_output(summary, expected)
 
     def test_temp_array(self, caplog):
         """Check that temporary arrays will be factored into the memory calculation"""
@@ -2187,18 +2185,20 @@ def test_temp_array(self, caplog):
             # Ensure an array temporary is created
             assert self._array_temp in str(op.ccode)
 
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            assert "Allocating" not in caplog.text
 
             check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
                         for func in (f, g, a))
 
             # Factor in the temp array
             check += reduce(mul, b.shape_allocated)*np.dtype(a.dtype).itemsize
 
-            expected = ("Kernel", check, 0)
-            self.parse_output(caplog, expected)
+            expected = (check, 0)
+            self.parse_output(summary, expected)
 
     def test_overrides(self, caplog):
+        # TODO: Consolidate this boilerplate
         grid0 = Grid(shape=(101, 101))
         # Original fields
         f0 = Function(name='f0', grid=grid0, space_order=4)
@@ -2213,6 +2213,13 @@ def test_overrides(self, caplog):
         s1 = SparseFunction(name='s1', grid=grid1, npoint=200)
         st1 = SparseTimeFunction(name='st1', grid=grid1, npoint=200, nt=20)
 
+        grid2 = Grid(shape=(51, 51))  # Smaller grid so overrides are distinct
+        # Alternative replacement fields
+        f2 = Function(name='f2', grid=grid2, space_order=4)
+        tf2 = TimeFunction(name='tf2', grid=grid2, space_order=4)
+        s2 = SparseFunction(name='s2', grid=grid2, npoint=50)
+        st2 = SparseTimeFunction(name='st2', grid=grid2, npoint=50, nt=5)
+
         eq0 = Eq(f0, 1)
         eq1 = Eq(tf0, 1)
         s0_term = s0.inject(field=f0, expr=s0)
@@ -2222,13 +2229,61 @@ def test_overrides(self, caplog):
             op = Operator([eq0, eq1] + s0_term + st0_term)
 
             # Apply overrides for the check
-            op.estimate_memory(f0=f1, tf0=tf1, s0=s1, st0=st1, human_readable=False)
+            summary0 = op.estimate_memory(f0=f1, tf0=tf1, s0=s1, st0=st1)
+
+            check0 = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
+                         for func in (f1, tf1, s1, s1.coordinates, st1, st1.coordinates))
+
+            expected0 = (check0, 0)
+            self.parse_output(summary0, expected0)
+
+            # Check with a second set of overrides
+            summary1 = op.estimate_memory(f0=f2, tf0=tf2, s0=s2, st0=st2)
+            assert "Allocating" not in caplog.text
+
+            check1 = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
+                         for func in (f2, tf2, s2, s2.coordinates, st2, st2.coordinates))
+
+            expected1 = (check1, 0)
+            self.parse_output(summary1, expected1)
+
+    def test_overrides_w_temp_array(self, caplog):
+        """Check that temporary arrays are correctly adjusted for overrides"""
+        grid = Grid(shape=(101, 101))
+        f = TimeFunction(name='f', grid=grid, space_order=2)
+        g = TimeFunction(name='g', grid=grid, space_order=2)
+        a = Function(name='a', grid=grid, space_order=2)
+
+        grid0 = Grid(shape=(51, 51))
+        f0 = TimeFunction(name='f0', grid=grid0, space_order=2)
+        g0 = TimeFunction(name='g0', grid=grid0, space_order=2)
+        a0 = Function(name='a0', grid=grid0, space_order=2)
+
+        # Fake array allocated in Python land so that shape_allocated can be used
+        b = Function(name='b', grid=grid0, space_order=0)
+
+        # Reuse an expensive function to encourage generation of an array temp
+        eq0 = Eq(f.forward, g + sympy.sin(a))
+        eq1 = Eq(g.forward, f + sympy.sin(a))
+
+        with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
+            op = Operator([eq0, eq1])
+
+            # Regression to ensure this test functions as intended
+            # Ensure an array temporary is created
+            assert self._array_temp in str(op.ccode)
+
+            summary = op.estimate_memory(f=f0, g=g0, a=a0)
+            assert "Allocating" not in caplog.text
 
             check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                        for func in (f1, tf1, s1, s1.coordinates, st1, st1.coordinates))
+                        for func in (f0, g0, a0))
+
+            # Factor in the temp array
+            check += reduce(mul, b.shape_allocated)*np.dtype(a0.dtype).itemsize
 
-            expected = ("Kernel", check, 0)
-            self.parse_output(caplog, expected)
+            expected = (check, 0)
+            self.parse_output(summary, expected)
 
     def test_device(self, caplog):
         # Note: this uses switchconfig and runs on all backends to reflect expected
@@ -2245,10 +2300,11 @@ def test_device(self, caplog):
         with switchconfig(**config), caplog.at_level(logging.DEBUG):
             op = Operator(Eq(f, 1))
 
-            op.estimate_memory(human_readable=False)
+            summary = op.estimate_memory()
+            assert "Allocating" not in caplog.text
 
             check = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
 
             # Matching memory allocated both on host and device for memmap
-            expected = ("Kernel", check, check)
-            self.parse_output(caplog, expected)
+            expected = (check, check)
+            self.parse_output(summary, expected)