From 4a1058ada87b636f2a2e32d3d906f5f02c4a7246 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Thu, 30 Apr 2026 12:37:18 +0100
Subject: [PATCH 01/15] added cell stats operator and changed nan behaviour in
 plot_histogram_series

Co-authored-by: Copilot <copilot@github.com>
---
 src/CSET/operators/feature.py | 144 ++++++++++++++++++++++++++++++++++
 src/CSET/operators/plot.py    |   4 +-
 2 files changed, 146 insertions(+), 2 deletions(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index a3cfe0c72..6f35b9a02 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -184,3 +184,147 @@ def track(
         tracking_cubelist.append(tracking_cube)
 
     return tracking_cubelist
+
+
+def cell_stats(
+    cube: iris.cube.Cube,
+    threshold: float,
+    under_threshold: bool = False,
+    min_size: int = 4,
+    save_data: bool = False,
+):
+    """Identify features in each timestep and output statistics.
+
+    Parameters
+    ----------
+    threshold: float
+        The threshold value for feature detection.
+    under_threshold: bool, optional
+        If set to True, features are identified where the data is below the threshold.
+        If set to False, features are identified where the data is above the threshold.
+        Default is False.
+    min_size: int, optional
+        The minimum number of contiguous grid points required for a feature to be tracked.
+        Default is 4.
+    save_data: bool, optional
+        If set to True, all tracking data is saved to disk for further analysis (including csv
+        and txt files containing feature properties that are not returned in output cubes).
+        Default is False.
+
+    Returns
+    -------
+    cell_stats_cubes: iris.cube.CubeList
+        An iris CubeList containing "feature_size", "feature_mean", and "feature_max" cubes.
+
+    Notes
+    -----
+    This operator uses the Simple-Track package with tracking disabled to identify features
+    in each timestep and compile cell statistics. Outputs cubes containing feature size (number
+    of grid points), mean value within features, and maximum value within features.
+
+    Links
+    ----------
+    .. https://github.com/ParaChute-UK/simple-track
+
+    Examples
+    --------
+    >>> cell_stats_cubes = feature.cell_stats(threshold=2)
+    >>> feature_size_cube = cell_stats_cubes.extract_cube("feature_size")
+    >>> plt.hist(feature_size_cube[-1])
+    >>> plt.show()
+
+    """
+    # Setup config
+    tracker_config = {
+        "FEATURE": {
+            "threshold": threshold,
+            "under_threshold": under_threshold,
+            "min_size": min_size,
+        },
+        "OUTPUT": {
+            "save_data": save_data,
+            "experiment_name": "feature_tracking",
+            "path": f"{os.getcwd()}/cell-stats_data",
+            "skip_tracking": True,
+        },
+    }
+    logging.debug(f"Tracker config: {tracker_config}")
+
+    # Get cube data into a dict to pass to Tracker
+    times = cube.coord("time").points
+    time_units = cube.coord("time").units
+    times_dt = [time_units.num2pydate(t) for t in times]
+    cube_dict = {
+        time: cube_slice.data
+        for time, cube_slice in zip(times_dt, cube.slices_over("time"), strict=True)
+    }
+
+    # Run tracking, returning Timeline object
+    timeline = Tracker(tracker_config).run(cube_dict)
+    logging.debug("Tracking completed")
+
+    # Get feature data from each frame of data, append to list of lists
+    # before conversion to numpy array (which requires knowledge of max
+    # number of features before constructing array shape)
+    size_data, mean_data, max_data = [], [], []
+    number_of_features = []
+    for time in times_dt:
+        frame = timeline.get_frame(time)
+        features = frame.features
+        size_data.append([feature.get_size() for feature in features.values()])
+        mean_data.append([feature.mean for feature in features.values()])
+        max_data.append([feature.max for feature in features.values()])
+        number_of_features.append(len(features))
+
+    # Pad data with NaNs to create arrays of consistent shape (max number of features across
+    # all timesteps)
+    arr_size = max(number_of_features)
+
+    # Size data is integer, but we need to pad with NaNs, so fill with invalid value first
+    size_data = np.array(
+        [
+            np.pad(sizes, (0, arr_size - len(sizes)), constant_values=-100)
+            for sizes in size_data
+        ],
+        dtype=float,
+    )
+    size_data[size_data == -100] = np.nan
+
+    # Mean and max data are already float, so can be padded with NaNs directly.
+    mean_data = np.array(
+        [
+            np.pad(means, (0, arr_size - len(means)), constant_values=np.nan)
+            for means in mean_data
+        ]
+    )
+    max_data = np.array(
+        [
+            np.pad(maxs, (0, arr_size - len(maxs)), constant_values=np.nan)
+            for maxs in max_data
+        ]
+    )
+
+    # Create cubes
+    time_coord = cube.coord("time").copy()
+    feature_coord = iris.coords.DimCoord(
+        np.arange(arr_size),
+        long_name="feature_number",
+        var_name="feature_number",
+        units="1",
+    )
+    coords = [time_coord, feature_coord]
+    coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
+
+    cell_stats_cubelist = iris.cube.CubeList()
+    cube_names = ["feature_size", "feature_mean", "feature_max"]
+    data_arrays = [size_data, mean_data, max_data]
+    for cube_name, data_array in zip(cube_names, data_arrays, strict=True):
+        cell_stats_cube = iris.cube.Cube(
+            data_array,
+            long_name=cube_name,
+            units="1",
+            dim_coords_and_dims=coords_and_dims,
+        )
+        cell_stats_cubelist.append(cell_stats_cube)
+
+    return cell_stats_cubelist
diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py
index 5ac8966a6..a88626181 100644
--- a/src/CSET/operators/plot.py
+++ b/src/CSET/operators/plot.py
@@ -3074,8 +3074,8 @@ def plot_histogram_series(
             break
 
     if levels is None:
-        vmin = min(cb.data.min() for cb in cubes)
-        vmax = max(cb.data.max() for cb in cubes)
+        vmin = min(np.nanmin(cb.data) for cb in cubes)
+        vmax = max(np.nanmax(cb.data) for cb in cubes)
 
     # Make postage stamp plots if stamp_coordinate exists and has more than a
     # single point. If single_plot is True:

From 40f87c21012bcba52d4bd6d7c499cf3c22b4f878 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Thu, 30 Apr 2026 12:39:26 +0100
Subject: [PATCH 02/15] added example cell_stats recipe

---
 .../example_feature_cell_stats.yaml           | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100755 src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml

diff --git a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
new file mode 100755
index 000000000..728c16cd1
--- /dev/null
+++ b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
@@ -0,0 +1,26 @@
+category: Quick Look
+title: Example running cell_stats and plotting histograms
+description: |
+  Uses the feature.cell_stats operator to calculate cell size, mean cell values and max cell values for 
+  identified features. 
+
+steps:
+  - operator: read.read_cubes
+    file_paths: $INPUT_PATHS
+
+  - operator: filters.filter_cubes
+    constraint:
+      operator: constraints.generate_var_constraint
+      varname: precipitation_flux
+
+  - operator: feature.cell_stats
+    threshold: 3
+
+  # Filter tracking cubelist to one of "feature_size", "feature_mean" or "feature_max"
+  - operator: filters.filter_cubes
+    constraint:
+      operator: constraints.generate_var_constraint
+      varname: feature_max
+    
+  - operator: plot.plot_histogram_series
+

From 74cb58e71d8d0fc4e8ba6981fd5dde0f47b3a1ac Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 1 May 2026 14:15:05 +0100
Subject: [PATCH 03/15] log-log plotting for features, and frequency on y-axis
 rather than density

---
 src/CSET/operators/plot.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py
index a88626181..562c2d5b7 100644
--- a/src/CSET/operators/plot.py
+++ b/src/CSET/operators/plot.py
@@ -1368,7 +1368,10 @@ def _plot_and_save_histogram_series(
 
     # Set default that histograms will produce probability density function
     # at each bin (integral over range sums to 1).
-    density = True
+    if "feature" in cubes[0].long_name:
+        density = False
+    else:
+        density = True
 
     for cube in iter_maybe(cubes):
         # Easier to check title (where var name originates)
@@ -1400,6 +1403,10 @@ def _plot_and_save_histogram_series(
             np.max(bins),
         )
 
+        if "feature" in cube.long_name:
+            ax.set_yscale("log")
+            ax.set_xscale("log")
+
         # Reshape cube data into a single array to allow for a single histogram.
         # Otherwise we plot xdim histograms stacked.
         cube_data_1d = (cube.data).flatten()
@@ -1432,6 +1439,8 @@ def _plot_and_save_histogram_series(
         ax.set_ylabel(
             f"Contribution to mean ({iter_maybe(cubes)[0].units})", fontsize=14
         )
+    if "feature" in cubes[0].long_name:
+        ax.set_ylabel("Frequency", fontsize=14)
     ax.set_xlim(vmin, vmax)
     ax.tick_params(axis="both", labelsize=12)
 

From e1070751fc8cce6920a0e604f90bbd89d329f423 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 1 May 2026 14:25:33 +0100
Subject: [PATCH 04/15] used same log bins for "surface_microphysical" as
 "feature" cubes - may need revisiting for mean and max

---
 src/CSET/operators/plot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/CSET/operators/plot.py b/src/CSET/operators/plot.py
index 562c2d5b7..9e0cca1ab 100644
--- a/src/CSET/operators/plot.py
+++ b/src/CSET/operators/plot.py
@@ -1377,7 +1377,7 @@ def _plot_and_save_histogram_series(
         # Easier to check title (where var name originates)
         # than seeing if long names exist etc.
         # Exception case, where distribution better fits log scales/bins.
-        if "surface_microphysical" in title:
+        if "surface_microphysical" in title or "feature" in cube.long_name:
             if "amount" in title:
                 # Compute histogram following Klingaman et al. (2017): ASoP
                 bin2 = np.exp(np.log(0.02) + 0.1 * np.linspace(0, 99, 100))

From f75e6d9c357aeceda20ecd6b519f55e142b5b56a Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 1 May 2026 15:51:51 +0100
Subject: [PATCH 05/15] added feature_effective_radius as alternative to
 feature_size

Co-authored-by: Copilot <copilot@github.com>
---
 src/CSET/operators/feature.py                 | 50 ++++++++++++++++---
 .../example_feature_cell_stats.yaml           |  6 ++-
 2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index 6f35b9a02..6dd32c510 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -17,6 +17,8 @@
 import os
 
 import iris
+import iris.cube
+import iris.util
 import numpy as np
 from simpletrack.track import Tracker
 
@@ -191,6 +193,7 @@ def cell_stats(
     threshold: float,
     under_threshold: bool = False,
     min_size: int = 4,
+    feature_size_unit: str = "feature_effective_radius",
     save_data: bool = False,
 ):
     """Identify features in each timestep and output statistics.
@@ -206,6 +209,11 @@ def cell_stats(
     min_size: int, optional
         The minimum number of contiguous grid points required for a feature to be tracked.
         Default is 4.
+    feature_size_unit: str, optional
+        The unit to define feature size output. Options are "feature_effective_radius"
+        (the radius of a circle with the same area as the feature, in km) or "
+        feature_grid_points" (the number of grid points contained in the feature).
+        Default is "feature_effective_radius".
     save_data: bool, optional
         If set to True, all tracking data is saved to disk for further analysis (including csv
         and txt files containing feature properties that are not returned in output cubes).
@@ -314,15 +322,43 @@ def cell_stats(
     )
     coords = [time_coord, feature_coord]
     coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
-
     cell_stats_cubelist = iris.cube.CubeList()
-    cube_names = ["feature_size", "feature_mean", "feature_max"]
-    data_arrays = [size_data, mean_data, max_data]
-    for cube_name, data_array in zip(cube_names, data_arrays, strict=True):
+
+    # Set cube properties
+    cube_properties = {
+        "feature_size": {
+            "data": size_data,
+            "long_name": feature_size_unit,
+            "units": "1",
+        },
+        "feature_mean": {"data": mean_data, "long_name": "feature_mean", "units": 1},
+        "feature_max": {"data": max_data, "long_name": "feature_max", "units": 1},
+    }
+    if feature_size_unit == "feature_effective_radius":
+        # Conert feature size to effective radius, assuming uniform grid
+        # Guess coord representing horizontal grid (choose first available)
+        hzntl_coord = [
+            coord
+            for coord in cube.coords()
+            if iris.util.guess_coord_axis(coord) in ["X", "Y"]
+        ][0]
+        logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
+        # Convert to km if possible
+        hzntl_coord.convert_units("km")
+        # Naive grid spacing estimate, correct for regular grids, likely wildly
+        # inaccruate for LFRic/irregular grids
+        grid_spacing = np.abs(np.diff(hzntl_coord.points).mean())
+        # Convert feature size in grid points to effective radius in km
+        size_data = np.sqrt(size_data * grid_spacing**2 / np.pi)
+        # Reset cube properties
+        cube_properties["feature_size"]["data"] = size_data
+        cube_properties["feature_size"]["units"] = "km"
+
+    for cb_props in cube_properties.values():
         cell_stats_cube = iris.cube.Cube(
-            data_array,
-            long_name=cube_name,
-            units="1",
+            data=cb_props["data"],
+            long_name=cb_props["long_name"],
+            units=cb_props["units"],
             dim_coords_and_dims=coords_and_dims,
         )
         cell_stats_cubelist.append(cell_stats_cube)
diff --git a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
index 728c16cd1..300417442 100755
--- a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
+++ b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
@@ -14,13 +14,15 @@ steps:
       varname: precipitation_flux
 
   - operator: feature.cell_stats
+    feature_size_unit: "feature_effective_radius"
     threshold: 3
 
-  # Filter tracking cubelist to one of "feature_size", "feature_mean" or "feature_max"
+  # Filter tracking cubelist to one of "feature_mean", "feature_max", or  
+  # "feature_size/feature_effective_radius" (depending on feature_size_unit set above), 
   - operator: filters.filter_cubes
     constraint:
       operator: constraints.generate_var_constraint
-      varname: feature_max
+      varname: feature_effective_radius
     
   - operator: plot.plot_histogram_series
 

From e928ba892cf757b3875b9e5d42c9531a50e1aaac Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 1 May 2026 15:56:26 +0100
Subject: [PATCH 06/15] additional comments

---
 src/CSET/operators/feature.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index 6dd32c510..c58151741 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -288,7 +288,8 @@ def cell_stats(
     # all timesteps)
     arr_size = max(number_of_features)
 
-    # Size data is integer, but we need to pad with NaNs, so fill with invalid value first
+    # Size data is integer, but we need to pad with NaNs (which is a float), so fill
+    # with invalid value first
     size_data = np.array(
         [
             np.pad(sizes, (0, arr_size - len(sizes)), constant_values=-100)
@@ -344,6 +345,7 @@ def cell_stats(
         ][0]
         logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
         # Convert to km if possible
+        # TODO: fall back to feature_size if this conversion fails
         hzntl_coord.convert_units("km")
         # Naive grid spacing estimate, correct for regular grids, likely wildly
         # inaccruate for LFRic/irregular grids
@@ -354,6 +356,7 @@ def cell_stats(
         cube_properties["feature_size"]["data"] = size_data
         cube_properties["feature_size"]["units"] = "km"
 
+    # Populate cubelist
     for cb_props in cube_properties.values():
         cell_stats_cube = iris.cube.Cube(
             data=cb_props["data"],

From 730c7aa1dc1be06b0e2009e74dd097431e898769 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 1 May 2026 18:00:19 +0100
Subject: [PATCH 07/15] added simple-track to dependencies

---
 pyproject.toml               | 1 +
 requirements/environment.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index e23672040..b5716c1b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
     "scipy",
     "scikit-image",
     "dask",
+    "simple-track",
 ]
 
 [project.urls]
diff --git a/requirements/environment.yml b/requirements/environment.yml
index af5ba93ac..2fbcaa3cb 100644
--- a/requirements/environment.yml
+++ b/requirements/environment.yml
@@ -16,6 +16,7 @@ dependencies:
   - scipy
   - scikit-image # For image processing techniques.
   - dask
+  - simple-track 
 
   # Build dependencies
   - setuptools>=64

From a3bce7723a9a190c9b88f98a0dfb153612c56d5c Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 8 May 2026 13:25:03 +0100
Subject: [PATCH 08/15] added cube metadata copying to feature cubes, and error
 catching for feature_effective_radius conversion

---
 src/CSET/operators/feature.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index c58151741..a9ebbcec3 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -346,7 +346,12 @@ def cell_stats(
         logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
         # Convert to km if possible
         # TODO: fall back to feature_size if this conversion fails
-        hzntl_coord.convert_units("km")
+        # TODO: this will still add the name "feature_effective_radius" to the feature_size
+        # cube, even though the data is actually pixel size. Figure out elegant solution.
+        try:
+            hzntl_coord.convert_units("km")
+        except:
+            pass
         # Naive grid spacing estimate, correct for regular grids, likely wildly
         # inaccruate for LFRic/irregular grids
         grid_spacing = np.abs(np.diff(hzntl_coord.points).mean())
@@ -356,6 +361,25 @@ def cell_stats(
         cube_properties["feature_size"]["data"] = size_data
         cube_properties["feature_size"]["units"] = "km"
 
+    # Get list of coords to copy from input cube to output cubes
+    copyable_coord_names = [
+        "realization",
+        "hour",
+        "forecast_period",
+        "forecast_reference_time",
+        "model_name",
+    ]
+    input_cube_coord_names = []
+    for coord in cube.coords():
+        input_cube_coord_names.append(coord.standard_name)
+        input_cube_coord_names.append(coord.long_name)
+
+    coords_to_copy = [
+        coord_name
+        for coord_name in copyable_coord_names
+        if coord_name in input_cube_coord_names
+    ]
+
     # Populate cubelist
     for cb_props in cube_properties.values():
         cell_stats_cube = iris.cube.Cube(
@@ -364,6 +388,11 @@ def cell_stats(
             units=cb_props["units"],
             dim_coords_and_dims=coords_and_dims,
         )
+        # Add other metadata from input cube
+        for coord_name in coords_to_copy:
+            cell_stats_cube.add_aux_coord(cube.coord(coord_name).copy())
+
+        # Add to cubelist
         cell_stats_cubelist.append(cell_stats_cube)
 
     return cell_stats_cubelist

From c52c4302851076a8a75d79fe62c44d957d0d7357 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Fri, 8 May 2026 13:31:04 +0100
Subject: [PATCH 09/15] added cset_comparison_base to list of coord metadata to
 copy to feature cube (may not be necessary?)

---
 src/CSET/operators/feature.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index a9ebbcec3..a33a385f4 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -368,6 +368,7 @@ def cell_stats(
         "forecast_period",
         "forecast_reference_time",
         "model_name",
+        "cset_comparison_base",
     ]
     input_cube_coord_names = []
     for coord in cube.coords():

From 54c3db580e803ee70f3d2a7aa86b26bd97e63e1e Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Wed, 13 May 2026 11:06:41 +0100
Subject: [PATCH 10/15] copying coord to cell_stats_cube now copies dimension
 information (assuming same cube structure)

---
 src/CSET/operators/feature.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index a33a385f4..adefa9bf3 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -391,7 +391,13 @@ def cell_stats(
         )
         # Add other metadata from input cube
         for coord_name in coords_to_copy:
-            cell_stats_cube.add_aux_coord(cube.coord(coord_name).copy())
+            coord = cube.coord(coord_name).copy()
+            # Check if this coord represents a dimension of data
+            dims = cube.coord_dims(coord)
+            if len(dims) > 1:
+                cell_stats_cube.add_aux_coord(coord, dims)
+            else:
+                cell_stats_cube.add_aux_coord(coord)
 
         # Add to cubelist
         cell_stats_cubelist.append(cell_stats_cube)

From dda9d48c57f747bbe0e8d445a7186f7ad72b63e0 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Tue, 19 May 2026 15:04:53 +0100
Subject: [PATCH 11/15] removed feature_effective_radius support for now, added
 support for multiple models in cell_stats operator

---
 src/CSET/operators/feature.py                 | 331 +++++++++---------
 .../example_feature_cell_stats.yaml           |  19 +-
 2 files changed, 182 insertions(+), 168 deletions(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index adefa9bf3..f9ce014d1 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -22,6 +22,8 @@
 import numpy as np
 from simpletrack.track import Tracker
 
+from CSET._common import iter_maybe
+
 
 def track(
     cube: iris.cube.Cube,
@@ -67,7 +69,7 @@ def track(
     Returns
     -------
     tracking_cubes: iris.cube.CubeList
-        A list of iris cubes containing tracking data, including feauture ID, lifetime,
+        A list of iris cubes containing tracking data, including feature ID, lifetime,
         and locations of initiating features.
 
     Notes
@@ -92,7 +94,7 @@ def track(
         "feature_init":
             A 2D binary field indicating the location of newly initiated features at each timestep.
             These features are identified as having a lifetime of 1 AND have initiated sufficiently
-            far from other, existing features that they are not considered to have spawed from them.
+            far from other, existing features that they are not considered to have spawned from them.
 
     Links
     ----------
@@ -189,7 +191,7 @@ def track(
 
 
 def cell_stats(
-    cube: iris.cube.Cube,
+    cubes: iris.cube.Cube | iris.cube.CubeList,
     threshold: float,
     under_threshold: bool = False,
     min_size: int = 4,
@@ -242,164 +244,179 @@ def cell_stats(
     >>> plt.show()
 
     """
-    # Setup config
-    tracker_config = {
-        "FEATURE": {
-            "threshold": threshold,
-            "under_threshold": under_threshold,
-            "min_size": min_size,
-        },
-        "OUTPUT": {
-            "save_data": save_data,
-            "experiment_name": "feature_tracking",
-            "path": f"{os.getcwd()}/cell-stats_data",
-            "skip_tracking": True,
-        },
-    }
-    logging.debug(f"Tracker config: {tracker_config}")
-
-    # Get cube data into a dict to pass to Tracker
-    times = cube.coord("time").points
-    time_units = cube.coord("time").units
-    times_dt = [time_units.num2pydate(t) for t in times]
-    cube_dict = {
-        time: cube_slice.data
-        for time, cube_slice in zip(times_dt, cube.slices_over("time"), strict=True)
-    }
+    # Check inputs
+    cubes = iter_maybe(cubes)
+    cell_stats_cubelist = iris.cube.CubeList()
 
-    # Run tracking, returning Timeline object
-    timeline = Tracker(tracker_config).run(cube_dict)
-    logging.debug("Tracking completed")
+    # Run tracking on all input data
+    for cube in cubes:
+        model_name = cube.attributes.get("model_name", None)
+        # Setup config
+        tracker_config = {
+            "FEATURE": {
+                "threshold": threshold,
+                "under_threshold": under_threshold,
+                "min_size": min_size,
+            },
+            "OUTPUT": {
+                "save_data": save_data,
+                "experiment_name": "feature_tracking",
+                "path": f"{os.getcwd()}/{model_name}/cell-stats_data",
+                "skip_tracking": True,
+            },
+        }
+        logging.debug(f"Tracker config: {tracker_config}")
+
+        # Get cube data into a dict to pass to Tracker
+        times = cube.coord("time").points
+        time_units = cube.coord("time").units
+        times_dt = [time_units.num2pydate(t) for t in times]
+        cube_dict = {
+            time: cube_slice.data
+            for time, cube_slice in zip(times_dt, cube.slices_over("time"), strict=True)
+        }
+
+        # Run tracking, returning Timeline object
+        timeline = Tracker(tracker_config).run(cube_dict)
+
+        logging.debug(f"Tracking completed for {model_name}")
+
+        # Get feature data from each frame of data, append to list of lists
+        # before conversion to numpy array (which requires knowledge of max
+        # number of features before constructing array shape)
+        size_data, mean_data, max_data = [], [], []
+        number_of_features = []
+        for time in times_dt:
+            frame = timeline.get_frame(time)
+            features = frame.features
+            size_data.append([feature.get_size() for feature in features.values()])
+            mean_data.append([feature.mean for feature in features.values()])
+            max_data.append([feature.max for feature in features.values()])
+            number_of_features.append(len(features))
+
+        # Pad data with NaNs to create arrays of consistent shape (max number of features across
+        # all timesteps)
+        arr_size = max(number_of_features)
+
+        # Size data is integer, but we need to pad with NaNs (which is a float), so fill
+        # with invalid value first
+        size_data = np.array(
+            [
+                np.pad(sizes, (0, arr_size - len(sizes)), constant_values=-100)
+                for sizes in size_data
+            ],
+            dtype=float,
+        )
+        size_data[size_data == -100] = np.nan
+
+        # Mean and max data are already float, so can be padded with NaNs directly.
+        mean_data = np.array(
+            [
+                np.pad(means, (0, arr_size - len(means)), constant_values=np.nan)
+                for means in mean_data
+            ]
+        )
+        max_data = np.array(
+            [
+                np.pad(maxs, (0, arr_size - len(maxs)), constant_values=np.nan)
+                for maxs in max_data
+            ]
+        )
 
-    # Get feature data from each frame of data, append to list of lists
-    # before conversion to numpy array (which requires knowledge of max
-    # number of features before constructing array shape)
-    size_data, mean_data, max_data = [], [], []
-    number_of_features = []
-    for time in times_dt:
-        frame = timeline.get_frame(time)
-        features = frame.features
-        size_data.append([feature.get_size() for feature in features.values()])
-        mean_data.append([feature.mean for feature in features.values()])
-        max_data.append([feature.max for feature in features.values()])
-        number_of_features.append(len(features))
-
-    # Pad data with NaNs to create arrays of consistent shape (max number of features across
-    # all timesteps)
-    arr_size = max(number_of_features)
-
-    # Size data is integer, but we need to pad with NaNs (which is a float), so fill
-    # with invalid value first
-    size_data = np.array(
-        [
-            np.pad(sizes, (0, arr_size - len(sizes)), constant_values=-100)
-            for sizes in size_data
-        ],
-        dtype=float,
-    )
-    size_data[size_data == -100] = np.nan
-
-    # Mean and max data are already float, so can be padded with NaNs directly.
-    mean_data = np.array(
-        [
-            np.pad(means, (0, arr_size - len(means)), constant_values=np.nan)
-            for means in mean_data
+        # Create cubes
+        time_coord = cube.coord("time").copy()
+        feature_coord = iris.coords.DimCoord(
+            np.arange(arr_size),
+            long_name="feature_number",
+            var_name="feature_number",
+            units="1",
+        )
+        coords = [time_coord, feature_coord]
+        coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
+
+        # Set cube properties
+        cube_properties = {
+            "feature_size": {
+                "data": size_data,
+                "long_name": feature_size_unit,
+                "units": "1",
+            },
+            "feature_mean": {
+                "data": mean_data,
+                "long_name": "feature_mean",
+                "units": 1,
+            },
+            "feature_max": {"data": max_data, "long_name": "feature_max", "units": 1},
+        }
+        # TODO: need to consider better implementation which doesn't overwrite feature_size cube
+        # TODO: consider iris.analysis.cartography.area_weights() or hzntl_coord.is_regular()
+        # to check for regular spacing
+        # TODO: Check for hzntl_coords is empty list and handle elegantly
+        # if feature_size_unit == "feature_effective_radius":
+        #     # Convert feature size to effective radius, assuming uniform grid
+        #     # Guess coord representing horizontal grid (choose first available)
+        #     hzntl_coord = [
+        #         coord
+        #         for coord in cube.coords()
+        #         if iris.util.guess_coord_axis(coord) in ["X", "Y"]
+        #     ][0]
+        #     logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
+        #     # Convert to km if possible
+        #     # TODO: fall back to feature_size if this conversion fails
+        #     # TODO: this will still add the name "feature_effective_radius" to the feature_size
+        #     # cube, even though the data is actually pixel size. Figure out elegant solution.
+        #     try:
+        #         hzntl_coord.convert_units("km")
+        #     except:
+        #         pass
+        #     # Naive grid spacing estimate, correct for regular grids, likely wildly
+        #     # inaccruate for LFRic/irregular grids
+        #     grid_spacing = np.abs(np.diff(hzntl_coord.points).mean())
+        #     # Convert feature size in grid points to effective radius in km
+        #     size_data = np.sqrt(size_data * grid_spacing**2 / np.pi)
+        #     # Reset cube properties
+        #     cube_properties["feature_size"]["data"] = size_data
+        #     cube_properties["feature_size"]["units"] = "km"
+
+        # Get list of coords to copy from input cube to output cubes
+        copyable_coord_names = [
+            "realization",
+            "hour",
+            "forecast_period",
+            "forecast_reference_time",
+            "model_name",
+            "cset_comparison_base",
         ]
-    )
-    max_data = np.array(
-        [
-            np.pad(maxs, (0, arr_size - len(maxs)), constant_values=np.nan)
-            for maxs in max_data
+        input_cube_coord_names = []
+        for coord in cube.coords():
+            input_cube_coord_names.append(coord.standard_name)
+            input_cube_coord_names.append(coord.long_name)
+
+        coords_to_copy = [
+            coord_name
+            for coord_name in copyable_coord_names
+            if coord_name in input_cube_coord_names
         ]
-    )
-
-    # Create cubes
-    time_coord = cube.coord("time").copy()
-    feature_coord = iris.coords.DimCoord(
-        np.arange(arr_size),
-        long_name="feature_number",
-        var_name="feature_number",
-        units="1",
-    )
-    coords = [time_coord, feature_coord]
-    coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
-    cell_stats_cubelist = iris.cube.CubeList()
-
-    # Set cube properties
-    cube_properties = {
-        "feature_size": {
-            "data": size_data,
-            "long_name": feature_size_unit,
-            "units": "1",
-        },
-        "feature_mean": {"data": mean_data, "long_name": "feature_mean", "units": 1},
-        "feature_max": {"data": max_data, "long_name": "feature_max", "units": 1},
-    }
-    if feature_size_unit == "feature_effective_radius":
-        # Conert feature size to effective radius, assuming uniform grid
-        # Guess coord representing horizontal grid (choose first available)
-        hzntl_coord = [
-            coord
-            for coord in cube.coords()
-            if iris.util.guess_coord_axis(coord) in ["X", "Y"]
-        ][0]
-        logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
-        # Convert to km if possible
-        # TODO: fall back to feature_size if this conversion fails
-        # TODO: this will still add the name "feature_effective_radius" to the feature_size
-        # cube, even though the data is actually pixel size. Figure out elegant solution.
-        try:
-            hzntl_coord.convert_units("km")
-        except:
-            pass
-        # Naive grid spacing estimate, correct for regular grids, likely wildly
-        # inaccruate for LFRic/irregular grids
-        grid_spacing = np.abs(np.diff(hzntl_coord.points).mean())
-        # Convert feature size in grid points to effective radius in km
-        size_data = np.sqrt(size_data * grid_spacing**2 / np.pi)
-        # Reset cube properties
-        cube_properties["feature_size"]["data"] = size_data
-        cube_properties["feature_size"]["units"] = "km"
-
-    # Get list of coords to copy from input cube to output cubes
-    copyable_coord_names = [
-        "realization",
-        "hour",
-        "forecast_period",
-        "forecast_reference_time",
-        "model_name",
-        "cset_comparison_base",
-    ]
-    input_cube_coord_names = []
-    for coord in cube.coords():
-        input_cube_coord_names.append(coord.standard_name)
-        input_cube_coord_names.append(coord.long_name)
-
-    coords_to_copy = [
-        coord_name
-        for coord_name in copyable_coord_names
-        if coord_name in input_cube_coord_names
-    ]
-
-    # Populate cubelist
-    for cb_props in cube_properties.values():
-        cell_stats_cube = iris.cube.Cube(
-            data=cb_props["data"],
-            long_name=cb_props["long_name"],
-            units=cb_props["units"],
-            dim_coords_and_dims=coords_and_dims,
-        )
-        # Add other metadata from input cube
-        for coord_name in coords_to_copy:
-            coord = cube.coord(coord_name).copy()
-            # Check if this coord represents a dimension of data
-            dims = cube.coord_dims(coord)
-            if len(dims) > 1:
-                cell_stats_cube.add_aux_coord(coord, dims)
-            else:
-                cell_stats_cube.add_aux_coord(coord)
 
-        # Add to cubelist
-        cell_stats_cubelist.append(cell_stats_cube)
+        # Populate cubelist
+        for cb_props in cube_properties.values():
+            cell_stats_cube = iris.cube.Cube(
+                data=cb_props["data"],
+                long_name=cb_props["long_name"],
+                units=cb_props["units"],
+                dim_coords_and_dims=coords_and_dims,
+            )
+            # Add other metadata from input cube
+            for coord_name in coords_to_copy:
+                coord = cube.coord(coord_name).copy()
+                # Check if this coord represents a dimension of data
+                dims = cube.coord_dims(coord)
+                if len(dims) > 1:
+                    cell_stats_cube.add_aux_coord(coord, dims)
+                else:
+                    cell_stats_cube.add_aux_coord(coord)
+
+            # Add to cubelist
+            cell_stats_cubelist.append(cell_stats_cube)
 
     return cell_stats_cubelist
diff --git a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
index 300417442..2f21438eb 100755
--- a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
+++ b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
@@ -1,28 +1,25 @@
 category: Quick Look
 title: Example running cell_stats and plotting histograms
 description: |
-  Uses the feature.cell_stats operator to calculate cell size, mean cell values and max cell values for 
-  identified features. 
+  Uses the feature.cell_stats operator to calculate cell size, mean cell values and max cell values for
+  identified features.
 
 steps:
   - operator: read.read_cubes
     file_paths: $INPUT_PATHS
 
-  - operator: filters.filter_cubes
+  - operator: filters.filter_multiple_cubes
     constraint:
       operator: constraints.generate_var_constraint
-      varname: precipitation_flux
+      varname: $VARNAME
 
   - operator: feature.cell_stats
-    feature_size_unit: "feature_effective_radius"
     threshold: 3
 
-  # Filter tracking cubelist to one of "feature_mean", "feature_max", or  
-  # "feature_size/feature_effective_radius" (depending on feature_size_unit set above), 
-  - operator: filters.filter_cubes
+  # Filter tracking cubelist to one of "feature_mean", "feature_max", or "feature_size"
+  - operator: filters.filter_multiple_cubes
     constraint:
       operator: constraints.generate_var_constraint
-      varname: feature_effective_radius
-    
-  - operator: plot.plot_histogram_series
+      varname: feature_size
 
+  - operator: plot.plot_histogram_series

From 65764a747af5ada054c250c700aa7be2512f17de Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Tue, 19 May 2026 15:52:42 +0100
Subject: [PATCH 12/15] changed example cell_stats recipe to include MODEL_NAME
 input arg, fixed removal of feature_effective_radius, input cube attrs now
 copied to cell_stats_cube

---
 src/CSET/operators/feature.py                    | 16 +++++++++-------
 .../example_feature_cell_stats.yaml              |  1 +
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index f9ce014d1..006cdc7d3 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -195,7 +195,6 @@ def cell_stats(
     threshold: float,
     under_threshold: bool = False,
     min_size: int = 4,
-    feature_size_unit: str = "feature_effective_radius",
     save_data: bool = False,
 ):
     """Identify features in each timestep and output statistics.
@@ -211,11 +210,11 @@ def cell_stats(
     min_size: int, optional
         The minimum number of contiguous grid points required for a feature to be tracked.
         Default is 4.
-    feature_size_unit: str, optional
-        The unit to define feature size output. Options are "feature_effective_radius"
-        (the radius of a circle with the same area as the feature, in km) or "
-        feature_grid_points" (the number of grid points contained in the feature).
-        Default is "feature_effective_radius".
+    # feature_size_unit: str, optional
+    #     The unit to define feature size output. Options are "feature_effective_radius"
+    #     (the radius of a circle with the same area as the feature, in km) or "
+    #     feature_grid_points" (the number of grid points contained in the feature).
+    #     Default is "feature_effective_radius".
     save_data: bool, optional
         If set to True, all tracking data is saved to disk for further analysis (including csv
         and txt files containing feature properties that are not returned in output cubes).
@@ -338,7 +337,7 @@ def cell_stats(
         cube_properties = {
             "feature_size": {
                 "data": size_data,
-                "long_name": feature_size_unit,
+                "long_name": "feature_size",
                 "units": "1",
             },
             "feature_mean": {
@@ -416,6 +415,9 @@ def cell_stats(
                 else:
                     cell_stats_cube.add_aux_coord(coord)
 
+            # Copy over attributes
+            cell_stats_cube.attributes = cube.attributes
+
             # Add to cubelist
             cell_stats_cubelist.append(cell_stats_cube)
 
diff --git a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
index 2f21438eb..9acf8e381 100755
--- a/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
+++ b/src/CSET/recipes/example_recipes/example_feature_cell_stats.yaml
@@ -7,6 +7,7 @@ description: |
 steps:
   - operator: read.read_cubes
     file_paths: $INPUT_PATHS
+    model_names: $MODEL_NAME
 
   - operator: filters.filter_multiple_cubes
     constraint:

From 7ed0592c66622800f4767c9a3f3887764057a0ca Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Wed, 27 May 2026 13:36:14 +0100
Subject: [PATCH 13/15] fixed bug for assigning coord dims

---
 src/CSET/operators/feature.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index 006cdc7d3..6d9e80dea 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -410,7 +410,7 @@ def cell_stats(
                 coord = cube.coord(coord_name).copy()
                 # Check if this coord represents a dimension of data
                 dims = cube.coord_dims(coord)
-                if len(dims) > 1:
+                if len(dims) > 0:
                     cell_stats_cube.add_aux_coord(coord, dims)
                 else:
                     cell_stats_cube.add_aux_coord(coord)

From 8554d53ecc02418edc6e2034896cad5f5300bce0 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Tue, 16 Jun 2026 14:06:22 +0100
Subject: [PATCH 14/15] added feature effective radius calculation

---
 src/CSET/operators/feature.py | 119 ++++++++++++++++++++--------------
 1 file changed, 72 insertions(+), 47 deletions(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index 6d9e80dea..0eba0d72f 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -210,11 +210,6 @@ def cell_stats(
     min_size: int, optional
         The minimum number of contiguous grid points required for a feature to be tracked.
         Default is 4.
-    # feature_size_unit: str, optional
-    #     The unit to define feature size output. Options are "feature_effective_radius"
-    #     (the radius of a circle with the same area as the feature, in km) or "
-    #     feature_grid_points" (the number of grid points contained in the feature).
-    #     Default is "feature_effective_radius".
     save_data: bool, optional
         If set to True, all tracking data is saved to disk for further analysis (including csv
         and txt files containing feature properties that are not returned in output cubes).
@@ -223,13 +218,15 @@ def cell_stats(
     Returns
     -------
     cell_stats_cubes: iris.cube.CubeList
-        An iris CubeList containing "feature_size", "feature_mean", and "feature_max" cubes.
+        An iris CubeList containing "feature_size", "feature_effective_radius", "feature_mean",
+        and "feature_max" cubes.
 
     Notes
     -----
     This operator uses the Simple-Track package with tracking disabled to identify features
     in each timestep and compile cell statistics. Outputs cubes containing feature size (number
-    of grid points), mean value within features, and maximum value within features.
+    of grid points), effective radius (in km), mean value within features, and maximum
+    value within features.
 
     Links
     ----------
@@ -308,6 +305,11 @@ def cell_stats(
         )
         size_data[size_data == -100] = np.nan
 
+        # Get effective radius from feature size, using horizontal coordinate of input cube to estimate grid spacing
+        effective_radius_data = _get_effective_radius_from_feature_size(
+            size_data=size_data, cube_with_hzntl_coord=cube
+        )
+
         # Mean and max data are already float, so can be padded with NaNs directly.
         mean_data = np.array(
             [
@@ -322,17 +324,6 @@ def cell_stats(
             ]
         )
 
-        # Create cubes
-        time_coord = cube.coord("time").copy()
-        feature_coord = iris.coords.DimCoord(
-            np.arange(arr_size),
-            long_name="feature_number",
-            var_name="feature_number",
-            units="1",
-        )
-        coords = [time_coord, feature_coord]
-        coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
-
         # Set cube properties
         cube_properties = {
             "feature_size": {
@@ -346,36 +337,23 @@ def cell_stats(
                 "units": 1,
             },
             "feature_max": {"data": max_data, "long_name": "feature_max", "units": 1},
+            "feature_effective_radius": {
+                "data": effective_radius_data,
+                "long_name": "feature_effective_radius",
+                "units": "km",
+            },
         }
-        # TODO: need to consider better implementation which doesn't overwrite feature_size cube
-        # TODO: consider iris.analysis.cartography.area_weights() or hzntl_coord.is_regular()
-        # to check for regular spacing
-        # TODO: Check for hzntl_coords is empty list and handle elegantly
-        # if feature_size_unit == "feature_effective_radius":
-        #     # Convert feature size to effective radius, assuming uniform grid
-        #     # Guess coord representing horizontal grid (choose first available)
-        #     hzntl_coord = [
-        #         coord
-        #         for coord in cube.coords()
-        #         if iris.util.guess_coord_axis(coord) in ["X", "Y"]
-        #     ][0]
-        #     logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
-        #     # Convert to km if possible
-        #     # TODO: fall back to feature_size if this conversion fails
-        #     # TODO: this will still add the name "feature_effective_radius" to the feature_size
-        #     # cube, even though the data is actually pixel size. Figure out elegant solution.
-        #     try:
-        #         hzntl_coord.convert_units("km")
-        #     except:
-        #         pass
-        #     # Naive grid spacing estimate, correct for regular grids, likely wildly
-        #     # inaccruate for LFRic/irregular grids
-        #     grid_spacing = np.abs(np.diff(hzntl_coord.points).mean())
-        #     # Convert feature size in grid points to effective radius in km
-        #     size_data = np.sqrt(size_data * grid_spacing**2 / np.pi)
-        #     # Reset cube properties
-        #     cube_properties["feature_size"]["data"] = size_data
-        #     cube_properties["feature_size"]["units"] = "km"
+
+        # Create cubes
+        time_coord = cube.coord("time").copy()
+        feature_coord = iris.coords.DimCoord(
+            np.arange(arr_size),
+            long_name="feature_number",
+            var_name="feature_number",
+            units="1",
+        )
+        coords = [time_coord, feature_coord]
+        coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
 
         # Get list of coords to copy from input cube to output cubes
         copyable_coord_names = [
@@ -422,3 +400,50 @@ def cell_stats(
             cell_stats_cubelist.append(cell_stats_cube)
 
     return cell_stats_cubelist
+
+
+def _get_effective_radius_from_feature_size(
+    size_data: np.ndarray, cube_with_hzntl_coord: iris.cube.Cube
+) -> np.ndarray:
+    """Convert feature size in grid points to effective radius in km.
+
+    Parameters
+    ----------
+    size_data: np.ndarray
+        An array containing "feature_size" data, in units of grid points.
+    cube_with_hzntl_coord: iris.cube.Cube
+        An iris cube containing a horizontal coordinate, which is used to
+        estimate the grid spacing for the effective radius calculation.
+
+    Returns
+    -------
+    effective_radii_data: np.ndarray
+        An array containing "feature_effective_radius" data, in units of km.
+
+    Notes
+    -----
+    This function assumes that the input cube has a horizontal coordinate system that is regular and
+    that the grid spacing can be estimated from the horizontal coordinates. The effective radius is
+    calculated as the radius of a circle with the same area as the feature size in grid points.
+
+    """
+    # Guess coord representing horizontal grid (choose first available)
+    hzntl_coord = [
+        coord
+        for coord in cube_with_hzntl_coord.coords()
+        if iris.util.guess_coord_axis(coord) in ["X", "Y"]
+    ][0]
+    logging.debug(f"Attempting to convert to effective radius using {hzntl_coord}")
+
+    # Check coordinate is regular, but only warn if not, this is a naive estimate
+    # and will be inaccurate for irregular grids
+    if not iris.util.is_regular(hzntl_coord):
+        logging.warning(
+            f"Horizontal coordinate {hzntl_coord} is not regular. "
+            "Effective radius calculation may be inaccurate."
+        )
+
+    grid_spacing = np.abs(np.mean(np.diff(hzntl_coord.points)))
+    effective_radii_data = np.sqrt(size_data * grid_spacing**2 / np.pi)
+
+    return effective_radii_data

From 963d808f87599d3ef74c0fad3132e223c1b18085 Mon Sep 17 00:00:00 2001
From: Adam Gainford <adam.gainford@reading.ac.uk>
Date: Tue, 16 Jun 2026 15:01:33 +0100
Subject: [PATCH 15/15] modularised cell_stats operator, added test

---
 src/CSET/operators/feature.py   | 252 ++++++++++++++++++++------------
 tests/operators/test_feature.py |  41 +++++-
 2 files changed, 195 insertions(+), 98 deletions(-)

diff --git a/src/CSET/operators/feature.py b/src/CSET/operators/feature.py
index 0eba0d72f..31a36e9b1 100755
--- a/src/CSET/operators/feature.py
+++ b/src/CSET/operators/feature.py
@@ -20,6 +20,7 @@
 import iris.cube
 import iris.util
 import numpy as np
+from simpletrack.frame import Timeline
 from simpletrack.track import Tracker
 
 from CSET._common import iter_maybe
@@ -274,62 +275,24 @@ def cell_stats(
 
         # Run tracking, returning Timeline object
         timeline = Tracker(tracker_config).run(cube_dict)
-
         logging.debug(f"Tracking completed for {model_name}")
 
-        # Get feature data from each frame of data, append to list of lists
-        # before conversion to numpy array (which requires knowledge of max
-        # number of features before constructing array shape)
-        size_data, mean_data, max_data = [], [], []
-        number_of_features = []
-        for time in times_dt:
-            frame = timeline.get_frame(time)
-            features = frame.features
-            size_data.append([feature.get_size() for feature in features.values()])
-            mean_data.append([feature.mean for feature in features.values()])
-            max_data.append([feature.max for feature in features.values()])
-            number_of_features.append(len(features))
-
-        # Pad data with NaNs to create arrays of consistent shape (max number of features across
-        # all timesteps)
-        arr_size = max(number_of_features)
-
-        # Size data is integer, but we need to pad with NaNs (which is a float), so fill
-        # with invalid value first
-        size_data = np.array(
-            [
-                np.pad(sizes, (0, arr_size - len(sizes)), constant_values=-100)
-                for sizes in size_data
-            ],
-            dtype=float,
+        # Get feature data from each frame of data
+        size_data, mean_data, max_data = _get_cell_stats_arrays_from_timeline(
+            timeline=timeline, expected_frame_times=times_dt
         )
-        size_data[size_data == -100] = np.nan
 
         # Get effective radius from feature size, using horizontal coordinate of input cube to estimate grid spacing
         effective_radius_data = _get_effective_radius_from_feature_size(
             size_data=size_data, cube_with_hzntl_coord=cube
         )
 
-        # Mean and max data are already float, so can be padded with NaNs directly.
-        mean_data = np.array(
-            [
-                np.pad(means, (0, arr_size - len(means)), constant_values=np.nan)
-                for means in mean_data
-            ]
-        )
-        max_data = np.array(
-            [
-                np.pad(maxs, (0, arr_size - len(maxs)), constant_values=np.nan)
-                for maxs in max_data
-            ]
-        )
-
-        # Set cube properties
+        # Set output cube properties
         cube_properties = {
             "feature_size": {
                 "data": size_data,
                 "long_name": "feature_size",
-                "units": "1",
+                "units": 1,
             },
             "feature_mean": {
                 "data": mean_data,
@@ -344,62 +307,75 @@ def cell_stats(
             },
         }
 
-        # Create cubes
-        time_coord = cube.coord("time").copy()
-        feature_coord = iris.coords.DimCoord(
-            np.arange(arr_size),
-            long_name="feature_number",
-            var_name="feature_number",
-            units="1",
+        # Create cubes, add to existing cubelist
+        cell_stats_cubelist.extend(
+            _add_cell_stats_data_to_cubes(
+                data_and_metadata_dict=cube_properties, template_cube=cube
+            )
         )
-        coords = [time_coord, feature_coord]
-        coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
-
-        # Get list of coords to copy from input cube to output cubes
-        copyable_coord_names = [
-            "realization",
-            "hour",
-            "forecast_period",
-            "forecast_reference_time",
-            "model_name",
-            "cset_comparison_base",
+
+    return cell_stats_cubelist
+
+
+def _get_cell_stats_arrays_from_timeline(
+    timeline: Timeline, expected_frame_times: list
+) -> list[np.ndarray]:
+    """Extract cell statistics data from a Simple-Track Timeline object.
+
+    Parameters
+    ----------
+    timeline: Timeline
+        A Simple-Track Timeline object containing tracked features.
+
+    Returns
+    -------
+    size_data: np.ndarray
+        A numpy array containing the size of each feature in grid points.
+    mean_data: np.ndarray
+        A numpy array containing the mean value of each feature.
+    max_data: np.ndarray
+        A numpy array containing the maximum value of each feature.
+    """
+    size_data, mean_data, max_data = [], [], []
+    number_of_features = []
+    for time in expected_frame_times:
+        frame = timeline.get_frame(time)
+        features = frame.features
+        size_data.append([feature.get_size() for feature in features.values()])
+        mean_data.append([feature.mean for feature in features.values()])
+        max_data.append([feature.max for feature in features.values()])
+        number_of_features.append(len(features))
+
+    # Pad data with NaNs to create arrays of consistent shape (max number of features across
+    # all timesteps)
+    arr_size = max(number_of_features)
+
+    # Size data is integer, but we need to pad with NaNs (which is a float), so fill
+    # with invalid value first
+    size_data = np.array(
+        [
+            np.pad(sizes, (0, arr_size - len(sizes)), constant_values=-100)
+            for sizes in size_data
+        ],
+        dtype=float,
+    )
+    size_data[size_data == -100] = np.nan
+
+    # Mean and max data are already float, so can be padded with NaNs directly.
+    mean_data = np.array(
+        [
+            np.pad(means, (0, arr_size - len(means)), constant_values=np.nan)
+            for means in mean_data
         ]
-        input_cube_coord_names = []
-        for coord in cube.coords():
-            input_cube_coord_names.append(coord.standard_name)
-            input_cube_coord_names.append(coord.long_name)
-
-        coords_to_copy = [
-            coord_name
-            for coord_name in copyable_coord_names
-            if coord_name in input_cube_coord_names
+    )
+    max_data = np.array(
+        [
+            np.pad(maxs, (0, arr_size - len(maxs)), constant_values=np.nan)
+            for maxs in max_data
         ]
+    )
 
-        # Populate cubelist
-        for cb_props in cube_properties.values():
-            cell_stats_cube = iris.cube.Cube(
-                data=cb_props["data"],
-                long_name=cb_props["long_name"],
-                units=cb_props["units"],
-                dim_coords_and_dims=coords_and_dims,
-            )
-            # Add other metadata from input cube
-            for coord_name in coords_to_copy:
-                coord = cube.coord(coord_name).copy()
-                # Check if this coord represents a dimension of data
-                dims = cube.coord_dims(coord)
-                if len(dims) > 0:
-                    cell_stats_cube.add_aux_coord(coord, dims)
-                else:
-                    cell_stats_cube.add_aux_coord(coord)
-
-            # Copy over attributes
-            cell_stats_cube.attributes = cube.attributes
-
-            # Add to cubelist
-            cell_stats_cubelist.append(cell_stats_cube)
-
-    return cell_stats_cubelist
+    return size_data, mean_data, max_data
 
 
 def _get_effective_radius_from_feature_size(
@@ -447,3 +423,89 @@ def _get_effective_radius_from_feature_size(
     effective_radii_data = np.sqrt(size_data * grid_spacing**2 / np.pi)
 
     return effective_radii_data
+
+
+def _add_cell_stats_data_to_cubes(
+    data_and_metadata_dict: dict, template_cube: iris.cube.Cube
+) -> iris.cube.CubeList:
+    """Add data to cubes, using template cube for metadata.
+
+    Parameters
+    ----------
+    data_and_metadata_dict: dict
+        A dictionary containing data and metadata for each cube to be created.
+        The keys are the long names of the cubes, and the values are dictionaries
+        containing the data and units for each cube.
+
+    template_cube: iris.cube.Cube
+        An iris cube to use as a template for the new cubes. The new cubes will
+        have the same attributes as the template cube.
+
+    Returns
+    -------
+    cubelist: iris.cube.CubeList
+        A list of iris cubes containing the added data.
+
+    """
+    cubelist = iris.cube.CubeList()
+
+    # Construct coordinates for new cubes
+    time_coord = template_cube.coord("time").copy()
+    # To construct feature coordinate, look at the size of dimension 1 for each data
+    arr_size = max(
+        [data_and_metadata_dict[cb]["data"].shape[1] for cb in data_and_metadata_dict]
+    )
+    feature_coord = iris.coords.DimCoord(
+        np.arange(arr_size),
+        long_name="feature_number",
+        var_name="feature_number",
+        units="1",
+    )
+    coords = [time_coord, feature_coord]
+    coords_and_dims = [(coord, i) for i, coord in enumerate(coords)]
+
+    # Get list of coords to copy from input cube to output cubes
+    copyable_coord_names = [
+        "realization",
+        "hour",
+        "forecast_period",
+        "forecast_reference_time",
+        "model_name",
+        "cset_comparison_base",
+    ]
+    input_cube_coord_names = []
+    for coord in template_cube.coords():
+        input_cube_coord_names.append(coord.standard_name)
+        input_cube_coord_names.append(coord.long_name)
+
+    coords_to_copy = [
+        coord_name
+        for coord_name in copyable_coord_names
+        if coord_name in input_cube_coord_names
+    ]
+
+    # Populate cubelist
+    for cb_props in data_and_metadata_dict.values():
+        cell_stats_cube = iris.cube.Cube(
+            data=cb_props["data"],
+            long_name=cb_props["long_name"],
+            units=cb_props["units"],
+            dim_coords_and_dims=coords_and_dims,
+        )
+        # Add other metadata from input cube
+        for coord_name in coords_to_copy:
+            coord = template_cube.coord(coord_name).copy()
+            # Check if this coord represents a dimension of data
+            dims = template_cube.coord_dims(coord)
+            if len(dims) > 0:
+                cell_stats_cube.add_aux_coord(coord, dims)
+            else:
+                cell_stats_cube.add_aux_coord(coord)
+
+        # Copy over attributes
+        cell_stats_cube.attributes = template_cube.attributes
+
+        # Add to cubelist
+        cubelist.append(cell_stats_cube)
+
+    return cubelist
diff --git a/tests/operators/test_feature.py b/tests/operators/test_feature.py
index 4d2565a7c..0ea13ebdf 100644
--- a/tests/operators/test_feature.py
+++ b/tests/operators/test_feature.py
@@ -30,9 +30,9 @@
 def feature_cube() -> iris.cube.Cube:
     """Set up three timesteps of data and place into cube."""
     data_arr = np.zeros((3, 10, 10))
-    data_arr[0, 2:6, 2:6] = 1
-    data_arr[1, 3:7, 3:7] = 1
-    data_arr[2, 4:8, 4:8] = 1
+    data_arr[0, 2:6, 2:6] = 5
+    data_arr[1, 3:7, 3:7] = 10
+    data_arr[2, 4:8, 4:8] = 20
 
     time_units = cf_units.Unit("days since 2000-01-01 00:00:00", calendar="gregorian")
     time_start = dt.datetime(2010, 1, 1, 0, 0, 0)
@@ -127,3 +127,38 @@ def test_save_data(feature_cube, tmp_path) -> None:
     # Check expected csv file is created in output directory
     expected_file = f"{output_directory}/frame_20100101_0000.csv"
     assert os.path.isfile(expected_file)
+
+
+def test_cell_stats_operator(feature_cube):
+    """
+    Test the cell_stats operator returns expected size, mean, and max values.
+
+    The expected values are based on the feature_cube data and the threshold of 0.5.
+    """
+    threshold = 0.5
+    min_size = 1
+    cubelist = feature.cell_stats(
+        cubes=feature_cube, threshold=threshold, min_size=min_size
+    )
+
+    # Extract data from cubelist, squeeze since there is only one feature per timestep
+    # in this test case
+    size_data = np.squeeze(cubelist.extract_cube("feature_size").data)
+    mean_data = np.squeeze(cubelist.extract_cube("feature_mean").data)
+    max_data = np.squeeze(cubelist.extract_cube("feature_max").data)
+    effective_radius_data = np.squeeze(
+        cubelist.extract_cube("feature_effective_radius").data
+    )
+
+    # Expected values based on the feature_cube data
+    expected_size_data = np.array([16, 16, 16])  # Each feature is a 4x4 square
+    expected_mean_data = np.array([5.0, 10.0, 20.0])  # Mean values of each feature
+    expected_max_data = np.array([5.0, 10.0, 20.0])  # Max values of each feature
+
+    grid_spacing = 10  # Assuming grid spacing is 10 meters from test setup
+    expected_radius_data = np.sqrt(expected_size_data * grid_spacing**2 / np.pi)
+
+    np.testing.assert_array_equal(size_data, expected_size_data)
+    np.testing.assert_array_equal(mean_data, expected_mean_data)
+    np.testing.assert_array_equal(max_data, expected_max_data)
+    np.testing.assert_array_equal(effective_radius_data, expected_radius_data)