diff --git a/accelforge/frontend/arch/_flattened_arch.py b/accelforge/frontend/arch/_flattened_arch.py
index 2567b278..15e60cb8 100644
--- a/accelforge/frontend/arch/_flattened_arch.py
+++ b/accelforge/frontend/arch/_flattened_arch.py
@@ -1,3 +1,12 @@
+from typing import TypeVar, Callable
+
+
+_FIND_SENTINEL = object()
+
+D = TypeVar("D")
+T = TypeVar("T")
+
+
 class FlattenedArch:
     """
     A flattened arch is an architecture spec that has been
@@ -52,3 +61,124 @@ def is_above(self, name_a: str, name_b: str):
         idx_a = self.index(name_a)
         idx_b = self.index(name_b)
         return idx_a < idx_b
+
+    def find_first_of_type_between(
+        self,
+        node_type: T,
+        name_lower: str,
+        name_upper: str,
+        default: D = _FIND_SENTINEL,
+        top_bottom: bool = True,
+    ) -> T | D:
+        """
+        Returns the first node with type `node_type` above `name_lower` and under `name_upper`.
+
+        If `name` does not exist, raises an error.
+
+        If no node of `node_type` is found, either `default` is
+        returned (if provided) or raises an error.
+        """
+        upper_idx = self.index(name_upper)
+        lower_idx = self.index(name_lower)
+
+        iterator = self.nodes
+        if not top_bottom:
+            iterator = reversed(top_bottom)
+        for i, node in enumerate(iterator):
+            if not isinstance(node, node_type) or i <= upper_idx or i >= lower_idx:
+                continue
+            else:
+                return node
+        if default is not _FIND_SENTINEL:
+            return default
+        else:
+            raise ValueError(f"node with type {node_type} between {name_upper} and {name_lower} not found")
+
+    def find_first_of_type_above(
+        self,
+        node_type: T,
+        name_lower: str,
+        default: D = _FIND_SENTINEL,
+        top_bottom: bool = True,
+    ) -> T | D:
+        """
+        Returns the first node with type `node_type` above `name_lower` and under `name_upper`.
+
+        If `name` does not exist, raises an error.
+
+        If no node of `node_type` is found, either `default` is
+        returned (if provided) or raises an error.
+        """
+        lower_idx = self.index(name_lower)
+
+        iterator = self.nodes
+        if not top_bottom:
+            iterator = reversed(top_bottom)
+        for i, node in enumerate(iterator):
+            if not isinstance(node, node_type) or i >= lower_idx:
+                continue
+            else:
+                return node
+        if default is not _FIND_SENTINEL:
+            return default
+        else:
+            raise ValueError(f"node with type {node_type} above {name_lower} not found")
+
+    def find_first_of_type_below(
+        self,
+        node_type: T,
+        name_upper: str,
+        default: D = _FIND_SENTINEL,
+        top_bottom: bool = True,
+    ) -> T | D:
+        """
+        Returns the first node with type `node_type` above `name_lower` and under `name_upper`.
+
+        If `name` does not exist, raises an error.
+
+        If no node of `node_type` is found, either `default` is
+        returned (if provided) or raises an error.
+        """
+        upper_idx = self.index(name_upper)
+
+        iterator = self.nodes
+        if not top_bottom:
+            iterator = reversed(top_bottom)
+        for i, node in enumerate(iterator):
+            if not isinstance(node, node_type) or i <= upper_idx:
+                continue
+            else:
+                return node
+        if default is not _FIND_SENTINEL:
+            return default
+        else:
+            raise ValueError(f"node with type {node_type} below {name_upper} not found")
+
+    def first_below(
+        self,
+        name: str,
+        filter: Callable = None,
+        default: D = _FIND_SENTINEL,
+    ) -> T | D:
+        """
+        Returns the first node with type `node_type` above `name_lower` and under `name_upper`.
+
+        If `name` does not exist, raises an error.
+
+        If no node of `node_type` is found, either `default` is
+        returned (if provided) or raises an error.
+        """
+        idx = self.index(name)
+
+        if filter is None:
+            filter = lambda x: True
+
+        for i, node in enumerate(self.nodes):
+            if not filter(node) or i <= idx:
+                continue
+            else:
+                return node
+        if default is not _FIND_SENTINEL:
+            return default
+        else:
+            raise ValueError(f"node below {name} not found")
diff --git a/accelforge/frontend/arch/components.py b/accelforge/frontend/arch/components.py
index f30360ae..867cb03b 100644
--- a/accelforge/frontend/arch/components.py
+++ b/accelforge/frontend/arch/components.py
@@ -145,7 +145,7 @@ def _set_n_calls(self, value: int | float) -> None:
     @classmethod
     def _deprecate_latency_fields(cls, data):
         if isinstance(data, dict):
-            if "latency" in data:
+            if "latency" in data and not "throughput" in data:
                 l = data.pop("latency")
                 warnings.warn(
                     f"Setting `latency` on `{cls.__name__}` is deprecated; use "
@@ -155,16 +155,11 @@ def _deprecate_latency_fields(cls, data):
                     DeprecationWarning,
                     stacklevel=2,
                 )
-                if "throughput" in data:
-                    raise ValueError(
-                        f"Cannot specify both `latency` and `throughput` on "
-                        f"`{cls.__name__}`. Drop the deprecated `latency` field."
-                    )
                 l = str(l).strip()
                 data["throughput"] = (
                     f"1 / ({l}) if ({l}) != 0 else float('inf')"
                 )
-            if "latency_scale" in data:
+            if "latency_scale" in data and not "throughput_scale" in data:
                 ls = data.pop("latency_scale")
                 warnings.warn(
                     f"Setting `latency_scale` on `{cls.__name__}` is deprecated; use "
@@ -174,11 +169,6 @@ def _deprecate_latency_fields(cls, data):
                     DeprecationWarning,
                     stacklevel=2,
                 )
-                if "throughput_scale" in data:
-                    raise ValueError(
-                        f"Cannot specify both `latency_scale` and `throughput_scale` "
-                        f"on `{cls.__name__}`. Drop the deprecated `latency_scale`."
-                    )
                 ls = str(ls).strip()
                 data["throughput_scale"] = (
                     f"1 / ({ls}) if ({ls}) != 0 else float('inf')"
@@ -1304,8 +1294,9 @@ def _render_node_color(self) -> str:
         return "#E0EEFF"
 
 
-class TopologySpec(str, enum.Enum):
+class TopologySpec(enum.StrEnum):
     MESH = "mesh"
+    ALL_TO_ALL = "all_to_all"
 
 
 class Network(Component, Leaf):
@@ -1316,6 +1307,20 @@ class Network(Component, Leaf):
     of the spatial nodes from top to bottom.
     """
 
+    total_latency: str | int | float = "max(max_hops*actions['hops'].latency, max_link_traffic/actions['hops'].throughput)"
+    """
+    Models latency as either:
+    - *Latency-bound*, which means that the latency of the route with the most number of
+      hops dominate the overall communication latency.
+    - *Bandwidth-bound*, which means that the traffic over the most congested link
+      dominates the overall communication latency.
+
+    Keywords:
+    - `max_hops` returns the number of hops in the longest route.
+    - `max_link_traffic` returns the amount of traffic (in bits) over the most congested
+      link.
+    """
+
     bits_per_value: EvalsTo[dict] = {}
     """
     Sets the bits per value for tensors in this `TensorHolder`. Keys are evaluated as
diff --git a/accelforge/frontend/arch/spatialable.py b/accelforge/frontend/arch/spatialable.py
index 44fa23f5..0b767302 100644
--- a/accelforge/frontend/arch/spatialable.py
+++ b/accelforge/frontend/arch/spatialable.py
@@ -93,6 +93,19 @@ def _eval_expressions(self, *args, **kwargs):
         return super(self.__class__, self)._eval_expressions(*args, **kwargs)
 
 
+class PhysicalSpatial(EvalableModel):
+    name: str
+    """
+    The name of the dimension over which this spatial fanout is occurring (e.g., X or Y).
+    """
+
+    fanout: EvalsTo[int]
+    """ The size of this fanout. """
+
+    stride: EvalsTo[int]
+    """ The number of array coordinates between each spatial fanout coordinate."""
+
+
 class Spatialable(EvalableModel):
     """Something that can be duplicated to create an array of."""
 
@@ -107,7 +120,7 @@ class Spatialable(EvalableModel):
     specified at this level also apply to lower-level `Leaf` nodes in the architecture.
     """
 
-    _physical_spatial: NoParse[Spatial] = EvalableList()
+    _physical_spatial: NoParse[PhysicalSpatial] = EvalableList()
     """
     The physical spatial fanout of this node. Should only have a value for a
     flattened arch. Otherwise, the `spatial` attribute is authoritative.
@@ -123,14 +136,29 @@ def get_fanout_along(self, dim_name: str, default: int = 1) -> int:
                 return s.fanout
         return default
 
+    def _has_physical_dim(self, dim_name: str) -> bool:
+        for s in self._physical_spatial:
+            if s.name == dim_name:
+                return True
+        return False
+
     def _get_physical_fanout_along(self, dim_name: str, default: int = 1) -> int:
         for s in self._physical_spatial:
             if s.name == dim_name:
                 return s.fanout
         return default
 
+    def _get_physical_stride_along(self, dim_name: str) -> int:
+        for s in self._physical_spatial:
+            if s.name == dim_name:
+                return s.stride
+        raise ValueError(f"dimension {dim_name} not found")
+
     def _spatial_str(self, include_newline=True) -> str:
         if not self.spatial:
             return ""
         result = ", ".join(f"{s.fanout}× {s.name}" for s in self.spatial)
         return f"\n[{result}]" if include_newline else result
+
+    def _is_distributed(self):
+        return any(s.fanout > 1 for s in self._physical_spatial)
\ No newline at end of file
diff --git a/accelforge/frontend/arch/structure.py b/accelforge/frontend/arch/structure.py
index 157a385a..8ceb7370 100644
--- a/accelforge/frontend/arch/structure.py
+++ b/accelforge/frontend/arch/structure.py
@@ -20,7 +20,7 @@
 
 from accelforge.util.exceptions import EvaluationError
 
-from accelforge.frontend.arch.spatialable import Spatialable
+from accelforge.frontend.arch.spatialable import Spatialable, PhysicalSpatial
 from accelforge.frontend.arch._flattened_arch import FlattenedArch
 
 from pydantic import Discriminator
@@ -334,6 +334,10 @@ def _flatten(
 
         nodes = []
 
+        # Nodes inside an array are flattened to fit into a hierarchical
+        # model in order to map.
+        # However, we will keep information about how these nodes are
+        # arranged for modeling.
         for node in self.nodes:
             try:
                 if isinstance(node, Branch):
@@ -342,7 +346,14 @@ def _flatten(
                     if isinstance(node, Spatialable):
                         fanout *= node.get_fanout()
                         node = deepcopy(node)
-                        node._physical_spatial = node.spatial
+                        node._physical_spatial = [
+                            PhysicalSpatial(
+                                name=s.name,
+                                fanout=s.fanout,
+                                stride=self.get_fanout_along(s.name)/s.fanout
+                            )
+                            for s in node.spatial
+                        ]
                         node.spatial = EvalableList()
                     nodes.append(node)
                 else:
diff --git a/accelforge/model/_looptree/latency/memory.py b/accelforge/model/_looptree/latency/memory.py
index c96ec9b1..080e3c04 100755
--- a/accelforge/model/_looptree/latency/memory.py
+++ b/accelforge/model/_looptree/latency/memory.py
@@ -14,7 +14,7 @@
 
 from accelforge.model._looptree.reuse.symbolic import BuffetStats
 from accelforge.util._eval_expressions import MATH_FUNCS, eval_expression
-from accelforge.util._sympy.broadcast_max import Max, Min
+from accelforge.util._sympy.broadcast_max import Max, Min, MaxGeqZero
 from accelforge.util._basetypes import EvalableList
 import symengine as se
 
@@ -71,6 +71,10 @@ def component_latency(
     component_to_actions: dict[str, dict[str, float]] = defaultdict(
         lambda: defaultdict(lambda: 0)
     )
+    # Holds ``keywords" that do not map neatly to actions, e.g., max_hops for network
+    component_to_keywords: dict[str, dict[str, float]] = defaultdict(
+        lambda: defaultdict(lambda: 0)
+    )
     name2component: dict[str, Component] = {node.name: node for node in flattened_arch}
 
     compute_obj = flattened_arch[-1]
@@ -103,6 +107,30 @@ def component_latency(
                 f"Component {component} is not a TensorHolder or Compute"
             )
 
+    network_to_max_link_traffic = defaultdict(lambda: defaultdict(lambda: 0))
+    network_to_max_hops = defaultdict(lambda: [])
+    # Aggregates across tensors
+    for network, network_stats in looptree_results.network_stats.items():
+        component = network.component
+        if component not in name2component:
+            raise ValueError(f"Component {component} found in mapping but not arch")
+
+        dim_traffic = network_to_max_link_traffic[component]
+        for dim, max_traffic_in_dim in network_stats.max_traffic.items():
+            dim_traffic[dim] += max_traffic_in_dim
+
+        network_to_max_hops[component].append(network_stats.max_hops)
+
+    for network, network_stats in looptree_results.network_stats.items():
+        component = network.component
+        keywords = component_to_keywords[component]
+        keywords["max_link_traffic"] = MaxGeqZero(
+            *network_to_max_link_traffic[component].values()
+        )
+        keywords["max_hops"] = MaxGeqZero(
+            *network_to_max_hops[component]
+        )
+
     longest_compute_latency = Max(
         0, *[s.max_latency for s in looptree_results.compute_stats.values()]
     )
@@ -138,13 +166,18 @@ def component_latency(
         "sum": _sum,
     }
 
-    for component in component_to_actions:
+    for component in name2component:
+        if component not in component_to_actions and component not in component_to_keywords:
+            continue
         component_obj = name2component[component]
         dump = component_obj.shallow_model_dump(include_None=True)
         # Replace serialized `actions` dump with local Action copies that carry
         # the correct n_calls for this job, so formulas can access `a.n_calls`,
         # `a.throughput`, etc. without mutating the shared spec state.
-        dump["actions"] = component_to_actions[component]
+        if component in component_to_actions:
+            dump["actions"] = component_to_actions[component]
+        if component in component_to_keywords:
+            dump |= component_to_keywords[component]
         symbol_table = {**symbol_table_base, **dump}
         if component_obj.total_latency is not None:
             component_latency[component] = eval_expression(
diff --git a/accelforge/model/_looptree/reuse/symbolic/_network.py b/accelforge/model/_looptree/reuse/symbolic/_network.py
index aec14fe6..0c833354 100644
--- a/accelforge/model/_looptree/reuse/symbolic/_network.py
+++ b/accelforge/model/_looptree/reuse/symbolic/_network.py
@@ -1,12 +1,11 @@
-import copy
-from accelforge.frontend.arch import Network as NetworkSpec
-from accelforge.frontend.mapping import (
-    Spatial,
-)
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any
 
 from accelforge.frontend.mapping import (
-    Spatial,
+    Spatial
 )
+from accelforge.frontend.arch.components import TopologySpec
 from accelforge.frontend._workload_isl._symbolic import (
     compute_dense_tile_occupancy,
     Irrelevant,
@@ -14,94 +13,341 @@
     PartiallyRelevant,
 )
 
-from accelforge.util._sympy.broadcast_max import Min, Max, MaxGeqZero
+from accelforge.util._sympy.broadcast_max import MaxGeqZero, MinGeqZero
 
 from ._common import AnalysisInfo
-from ._stats import NetworkStats
+from ._stats import NetworkStats, SymbolicAnalysisOutput
+
+
+@dataclass
+class PerLoopTransferCost:
+    """The per-spatial-loop cost contributed by a single network, as computed
+    by a :class:`TopologyModel`."""
+
+    total_cost: Any
+    """Total hops contributed by data movement over this spatial loop."""
+    max_hops: Any
+    """Hops added to the longest route by this spatial loop."""
+    max_traffic: Any
+    """Maximum traffic (in actions) on any single link along this dimension."""
+
+
+class TopologyModel(ABC):
+    """Computes the cost of moving data across a network of a given topology.
+
+    Subclasses encapsulate everything topology-specific about how a tensor's
+    data is delivered across a spatial fanout. :class:`NetworkAnalyzer` selects
+    the model for each network from its component's
+    :class:`~accelforge.frontend.arch.components.TopologySpec` and remains
+    agnostic to the topology itself.
+
+    Instances are stateful: they accumulate per-network max hops across the
+    repeated spatial-loop iterations of a single :class:`NetworkAnalyzer`, so a
+    fresh model is constructed for each analyzer (see :func:`get_topology_model`).
+    """
+
+    def __init__(self):
+        # Running total of max hops per network, accumulated across the
+        # repeated spatial-loop iterations handled by one NetworkAnalyzer.
+        self.overall_max_hops: dict = {}
+
+    def accumulate_max_hops(self, network, max_hops):
+        """Add this loop's ``max_hops`` to ``network``'s running total and
+        return the updated total.
+
+        Each call to :meth:`NetworkAnalyzer.accumulate_child_result` (i.e., over
+        a different iteration of a spatial loop) adds more to the max hops.
+        """
+        self.overall_max_hops[network] = (
+            self.overall_max_hops.get(network, 0) + max_hops
+        )
+        return self.overall_max_hops[network]
+
+    @abstractmethod
+    def per_loop_transfer_cost(
+        self,
+        relevancy,
+        *,
+        shape_repeats,
+        last_fanout,
+        volume,
+        src_component,
+        dim_name: str,
+    ) -> PerLoopTransferCost:
+        """Return the :class:`PerLoopTransferCost` for moving ``volume`` of data across one
+        spatial loop.
+
+        Args:
+            relevancy: The relevancy of the spatial loop's rank variable to the
+                tensor (``Irrelevant``, ``Relevant``, or ``PartiallyRelevant``).
+            shape_repeats: The number of iterations of this spatial loop.
+            last_fanout: The fanout in this dimension among mapping nodes below
+                (i.e., the stride).
+            volume: The data volume (in actions) moved per destination.
+            src_component: The flattened-arch component sourcing the data, used
+                to query physical fanout/stride.
+            dim_name: The name of the spatial dimension (e.g., ``X`` or ``Y``).
+        """
+        raise NotImplementedError
+
+
+class MeshTopologyModel(TopologyModel):
+    """Cost model for a mesh network.
+
+    Data travels along one axis of the mesh. Multicast delivers a value to every
+    point along the dimension; unicast delivers a distinct value to each point.
+    When the source is physically distributed, data is bound as locally as
+    possible across the physical buffers.
+    """
+
+    def per_loop_transfer_cost(
+        self,
+        relevancy,
+        *,
+        shape_repeats,
+        last_fanout,
+        volume,
+        src_component,
+        dim_name,
+    ) -> PerLoopTransferCost:
+        if isinstance(relevancy, Irrelevant):
+            # The volume travels through link by link in one axis of the mesh
+            # Distributed or not, the amount of total cost is the same.
+            # However, the accesses now come from different physical memories
+            total_cost = multicast_cost(shape_repeats, last_fanout) * volume
+            max_hops = shape_repeats * last_fanout
+            max_traffic = volume
+        elif isinstance(relevancy, Relevant):
+            # If distributed, then we bind data as locally as possible in the
+            # physical buffers
+            if src_component._get_physical_fanout_along(dim_name) > 1:
+                physical_stride = src_component._get_physical_stride_along(dim_name)
+
+                n_dsts_per_physical = MinGeqZero(
+                    # if last_fanout > physical_stride, set n_dst to 1, which results in 0 hops
+                    # later (which is correct because the set of destinations always overlap
+                    # the set of sources).
+                    MaxGeqZero(physical_stride / last_fanout, 1),
+                    shape_repeats
+                )
+                n_activated_physical = MaxGeqZero(shape_repeats * last_fanout / physical_stride, 1)
+                total_cost = (
+                    n_activated_physical
+                    *
+                    unicast_cost(n_dsts_per_physical, last_fanout)
+                    *
+                    volume
+                )
+                max_hops = MinGeqZero((n_dsts_per_physical - 1) * last_fanout, physical_stride)
+                max_traffic = (n_dsts_per_physical - 1) * volume
+            else:
+                total_cost = unicast_cost(shape_repeats, last_fanout) * volume
+                max_hops = shape_repeats * last_fanout
+                max_traffic = (shape_repeats - 1) * volume
+        elif isinstance(relevancy, PartiallyRelevant):
+            raise NotImplementedError()
+        else:
+            raise RuntimeError(f"unhandled relevancy type {relevancy}")
+
+        return PerLoopTransferCost(total_cost=total_cost, max_hops=max_hops, max_traffic=max_traffic)
+
+
+class AllToAllTopologyModel(TopologyModel):
+    """Cost model for an all-to-all network using a switch (e.g. NVLink).
+
+    Every node connects to every other node through a switch, so any
+    source reaches any destination in one hop regardless of
+
+    Physical stride is irrelevant, so ``last_fanout`` and physical distribution
+    are not used.
+    """
+
+    HOPS_PER_TRANSFER = 1
+    """Hops charged for one source-to-destination transfer across the switch.
+    One switch traversal is treated as a single hop; the per-hop energy and
+    latency come from the network component's ``hops`` action."""
+
+    def per_loop_transfer_cost(
+        self,
+        relevancy,
+        *,
+        shape_repeats,
+        last_fanout,
+        volume,
+        src_component,
+        dim_name,
+    ) -> PerLoopTransferCost:
+        hops = self.HOPS_PER_TRANSFER
+
+        # n - 1 other instances each receive the data across the switch. The
+        # source already holds it (the set of destinations overlaps the set of
+        # sources), so it needs no transfer to itself.
+        n_dsts = shape_repeats - 1
+
+        if isinstance(relevancy, (Irrelevant, Relevant)):
+            # Same delivery count (and hence energy) whether the data is shared
+            # (multicast) or distinct per instance (unicast): each of the n - 1
+            # destinations is one switch traversal away.
+            total_cost = n_dsts * hops * volume
+            # Every route is a single switch traversal, independent of distance.
+            max_hops = hops
+            if isinstance(relevancy, Irrelevant):
+                # Multicast: the switch replicates, so each link carries the
+                # value at most once.
+                max_traffic = volume
+            else:
+                # Unicast: the source's uplink to the switch carries all n - 1
+                # distinct messages, making it the most congested link.
+                max_traffic = n_dsts * volume
+        elif isinstance(relevancy, PartiallyRelevant):
+            raise NotImplementedError()
+        else:
+            raise RuntimeError(f"unhandled relevancy type {relevancy}")
+
+        return PerLoopTransferCost(
+            total_cost=total_cost, max_hops=max_hops, max_traffic=max_traffic
+        )
+
+
+# Registry of topology models
+TOPOLOGY_MODELS: dict[TopologySpec, type[TopologyModel]] = {
+    TopologySpec.MESH: MeshTopologyModel,
+    TopologySpec.ALL_TO_ALL: AllToAllTopologyModel,
+}
+
+
+def get_topology_model(topology) -> TopologyModel:
+    """Construct a fresh :class:`TopologyModel` for the given topology."""
+    return TOPOLOGY_MODELS[topology]()
 
 
 class NetworkAnalyzer:
-    def __init__(self, network_stats):
-        self.overall_max_hops = 0
+    def __init__(self, network_stats, info: AnalysisInfo, einsum_name, node: Spatial):
         self.network_stats = network_stats
+        # These don't change across calls to accumulate_child_result.
+        self.info = info
+        self.einsum_name = einsum_name
+        self.node = node
+        # Each network gets its own topology model, since different networks may
+        # have different topologies. Models are constructed lazily, the first
+        # time a network needs costing, and reused for the analyzer's lifetime so
+        # their accumulated max hops persist.
+        self.topology_models: dict = {}
+
+    def _get_topology_model(self, network, topology) -> TopologyModel:
+        if network not in self.topology_models:
+            self.topology_models[network] = get_topology_model(topology)
+        return self.topology_models[network]
 
     def accumulate_child_result(
         self,
-        child_result,
-        info: AnalysisInfo,
+        child_result: SymbolicAnalysisOutput,
         shape_repeats,
-        einsum_name,
         child_shape,
-        node,
     ):
+        """This function is called for every repeated shape."""
+        flattened_arch = self.info.job.flattened_arch
+
         for network, child_network_stats in child_result.network_stats.items():
+            src_component = flattened_arch[network.source.level]
             if network not in self.network_stats:
                 self.network_stats[network] = NetworkStats()
             accumulated_network_stats = self.network_stats[network]
 
+            # We only need to update the summary if the spatial loop is for
+            # a component higher than the network of interest
+            if flattened_arch.is_above(self.node.component, network.component):
+                accumulated_network_stats.total_hops += (
+                    child_network_stats.total_hops * shape_repeats
+                )
+                accumulated_network_stats.max_hops = MaxGeqZero(
+                    accumulated_network_stats.max_hops,
+                    child_network_stats.max_hops,
+                )
+                for k, v in child_network_stats.max_traffic.items():
+                    accumulated_network_stats.max_traffic[k] = MaxGeqZero(
+                        accumulated_network_stats.max_traffic.get(k, 0),
+                        v
+                    )
+                continue
+
+            volume = self._get_data_volume(network, child_shape)
+
+            relevancy = self.info.tensor_to_relevancy[network.tensor][self.node.rank_variable]
+
+            # The fanout in this dimension in mapping nodes below, i.e., the stride
+            last_fanout = child_result.fanout.get((self.node.component, self.einsum_name), {})
+            last_fanout = last_fanout.get(self.node.name, 1)
+
+            topology_model = self._get_topology_model(
+                network, flattened_arch[network.component].topology
+            )
+            per_loop_transfer_cost = topology_model.per_loop_transfer_cost(
+                relevancy,
+                shape_repeats=shape_repeats,
+                last_fanout=last_fanout,
+                volume=volume,
+                src_component=src_component,
+                dim_name=self.node.name,
+            )
+
+            overall_max_hops = topology_model.accumulate_max_hops(
+                network, per_loop_transfer_cost.max_hops
+            )
+
             accumulated_network_stats.total_hops += (
-                child_network_stats.total_hops * shape_repeats
+                per_loop_transfer_cost.total_cost
+                + child_network_stats.total_hops * shape_repeats
             )
             accumulated_network_stats.max_hops = MaxGeqZero(
                 accumulated_network_stats.max_hops,
-                child_network_stats.max_hops,
+                overall_max_hops + child_network_stats.max_hops,
             )
-            projection = info.einsum_tensor_to_projection[(einsum_name, network.tensor)]
-            component_object = info.job.flattened_arch[network.component]
-            workload_bpv = info.job.einsum.tensor_accesses[
-                network.tensor
-            ].bits_per_value
-            bits_per_value = component_object.bits_per_value.get(
-                network.tensor, workload_bpv
-            )
-            bits_per_action = component_object.bits_per_action
-            if bits_per_action is not None:
-                actions_per_value = bits_per_value / bits_per_action
-            else:
-                actions_per_value = bits_per_value
-            volume = (
-                compute_dense_tile_occupancy(projection, child_shape)
-                * actions_per_value
+            accumulated_network_stats.max_traffic[self.node.name] = MaxGeqZero(
+                accumulated_network_stats.max_traffic.get(self.node.name, 0),
+                per_loop_transfer_cost.max_traffic + child_network_stats.max_traffic.get(self.node.name, 0)
             )
 
-            if info.job.spec_one_einsum.arch.is_above(
-                node.component, network.component
-            ):
-                continue
+        overall_max_hops = {}
+        for model in self.topology_models.values():
+            overall_max_hops.update(model.overall_max_hops)
+        return overall_max_hops
 
-            relevancy = info.tensor_to_relevancy[network.tensor][node.rank_variable]
+    def _get_data_volume(self, network, child_shape):
+        info = self.info
+        einsum_name = self.einsum_name
+        flattened_arch = info.job.flattened_arch
+        projection = info.einsum_tensor_to_projection[(einsum_name, network.tensor)]
+        component_object = flattened_arch[network.component]
+        workload_bpv = info.job.einsum.tensor_accesses[
+            network.tensor
+        ].bits_per_value
+        bits_per_value = component_object.bits_per_value.get(
+            network.tensor, workload_bpv
+        )
+        bits_per_action = component_object.bits_per_action
+        if bits_per_action is not None:
+            actions_per_value = bits_per_value / bits_per_action
+        else:
+            actions_per_value = bits_per_value
+        volume = (
+            compute_dense_tile_occupancy(projection, child_shape)
+            * actions_per_value
+        )
+        return volume
 
-            last_fanout = child_result.fanout.get((node.component, einsum_name), {})
-            last_fanout = last_fanout.get(node.name, 1)
-            if isinstance(relevancy, Irrelevant):
-                # Cost of multicasting is the cost of delivering along the dimension
-                multicast_hops = shape_repeats * last_fanout
-                multicast_cost = multicast_hops * volume
-                self.overall_max_hops += multicast_hops
 
-                accumulated_network_stats.total_hops += multicast_cost
-                accumulated_network_stats.max_hops = MaxGeqZero(
-                    accumulated_network_stats.max_hops,
-                    self.overall_max_hops + child_network_stats.max_hops,
-                )
-            elif isinstance(relevancy, Relevant):
-                # Cost of unicast is the cost of delivering to each point in
-                # the dimension with shape as stride
-                # TODO: we should use the actual stride
-                total_unicast_cost = (
-                    0.5 * (shape_repeats + 1) * shape_repeats * last_fanout * volume
-                )
-                max_unicast_hops = shape_repeats * last_fanout
-                self.overall_max_hops += max_unicast_hops
+def multicast_cost(n_dsts, stride):
+    """Returns total hops of multicast along a dimension."""
+    return (n_dsts-1)*stride
+
+
+def unicast_cost(n_dsts, stride):
+    """Returns total hops of unicast along a dimension."""
+    # Cost of unicast is the cost of delivering to each point in
+    # the dimension with shape as stride
+    return arithmetic_sum(n_dsts-1)*stride
 
-                accumulated_network_stats.total_hops += total_unicast_cost
-                accumulated_network_stats.max_hops = MaxGeqZero(
-                    accumulated_network_stats.max_hops,
-                    self.overall_max_hops + child_network_stats.max_hops,
-                )
-            elif isinstance(relevancy, PartiallyRelevant):
-                raise NotImplementedError()
-            else:
-                raise RuntimeError(f"unhandled relevancy type {relevancy}")
 
-        return self.overall_max_hops
+def arithmetic_sum(n):
+    return 0.5 * (n+1) * n
diff --git a/accelforge/model/_looptree/reuse/symbolic/_stats.py b/accelforge/model/_looptree/reuse/symbolic/_stats.py
index 8368937d..aa2c1d90 100644
--- a/accelforge/model/_looptree/reuse/symbolic/_stats.py
+++ b/accelforge/model/_looptree/reuse/symbolic/_stats.py
@@ -21,7 +21,11 @@
 @dataclass
 class NetworkStats:
     total_hops: Any = field(default=0)
+    """Total number of hops overall. Useful to calculate energy."""
     max_hops: Any = field(default=0)
+    """Longest hops among all routes."""
+    max_traffic: dict[int | str, Any] = field(default_factory=dict)
+    """Maximum traffic occuring on any single link along a dimension."""
 
     def repeat(self, n_repeats):
         new = copy.copy(self)
@@ -32,10 +36,6 @@ def repeat(self, n_repeats):
         new.total_hops = new.total_hops * n_repeats
         return new
 
-    def combine(self, other: "NetworkStats"):
-        self.total_hops += other.total_hops
-        self.max_hops = max(self.max_hops, other.max_hops)
-
 
 @dataclass
 class BuffetStats:
@@ -100,6 +100,12 @@ def repeat_temporal(self, factor: int, is_fully_relevant: bool) -> "BuffetStats"
         return new
 
     def repeat_spatial(self, factor: int, reuse_parent_accesses: bool) -> "BuffetStats":
+        """
+        Repeat buffet stats due to spatial loop `factor` number of times.
+
+        For accesses to parent, the amount of repetition is `factor` if `reuse_parent_access`
+        is False; otherwise, there is no repetition.
+        """
         new = copy.copy(self)
         if factor == 1:
             return new
diff --git a/accelforge/model/_looptree/reuse/symbolic/_symbolic.py b/accelforge/model/_looptree/reuse/symbolic/_symbolic.py
index abc9238f..e8d628e7 100755
--- a/accelforge/model/_looptree/reuse/symbolic/_symbolic.py
+++ b/accelforge/model/_looptree/reuse/symbolic/_symbolic.py
@@ -585,13 +585,16 @@ def analyze_spatial(node_idx, current_shape, info: AnalysisInfo):
     node: Spatial = mapping[node_idx]
     rank_var = node.rank_variable
     node_dim = node.name
-    spatial_component = info.job.flattened_arch[node.component]
+    flattened_arch = info.job.flattened_arch
+    spatial_component = flattened_arch[node.component]
     component_spatial_dim = spatial_component.spatial[node_dim]
     stride_and_shape = loop_stride_and_shape(node, current_shape, node_idx, info)
 
     result_accumulator = SymbolicAnalysisOutput()
 
-    network_analyzer = NetworkAnalyzer(result_accumulator.network_stats)
+    network_analyzer = NetworkAnalyzer(
+        result_accumulator.network_stats, info, einsum_name, node
+    )
 
     def handle_repeated_value(repeated_shape):
         shape_value = repeated_shape.value
@@ -605,7 +608,6 @@ def handle_repeated_value(repeated_shape):
         accumulated_buffet_stats = result_accumulator.buffet_stats
         child_stats = list(child_result.buffet_stats.items())
         for i, (buffet, buffet_stats) in enumerate(child_stats):
-            stats = buffet_stats
             accumulated_stats = accumulated_buffet_stats.setdefault(
                 buffet, BuffetStats.blank()
             )
@@ -627,13 +629,13 @@ def handle_repeated_value(repeated_shape):
                 and buffet.tensor in component_spatial_dim.may_reuse
             )
 
-            stats.n_loops_above = stats.n_loops_above + 1
-            accumulated_stats += stats.repeat_spatial(
+            buffet_stats.n_loops_above = buffet_stats.n_loops_above + 1
+            accumulated_stats += buffet_stats.repeat_spatial(
                 shape_repeats, reuse_parent_accesses
             )
 
         network_analyzer.accumulate_child_result(
-            child_result, info, shape_repeats, einsum_name, child_shape, node
+            child_result, shape_repeats, child_shape
         )
 
         for einsum, child_steps in child_result.temporal_steps.items():
@@ -691,6 +693,7 @@ def analyze_storage(
     count_writes: bool = True,
 ):
     mapping = info.mapping
+    flattened_arch = info.job.flattened_arch
     einsum_name = mapping[-1].einsum
     node: TensorHolder = mapping[node_idx]
 
@@ -797,25 +800,49 @@ def inherit_add(attr: str, default_value: Any = fills) -> Any:
         else:
             write_scale = 0
 
+        # =======================
+        # For distributed buffers
+        n_active_physical_units = 1
+        if child is not None:
+            next_spatial = flattened_arch.first_below(
+                node.component,
+                lambda n: isinstance(n, arch.Spatialable) and len(n.spatial) > 0,
+                default=None,
+            )
+            if component_object._is_distributed() and next_spatial is not None:
+                for (b, e), dim_fanout in child_result.fanout.items():
+                    if b != next_spatial.name:
+                        continue
+                    for d in dim_fanout:
+                        if not component_object._has_physical_dim(d):
+                            continue
+                        n_active_physical_units *= (
+                            dim_fanout[d] / component_object._get_physical_stride_along(d)
+                        )
+
+        # ==========================
+        # Recalculate usage of distributed buffers
+        stats.max_occupancy /= n_active_physical_units
+
         # ==========================
         # Data exchanges with parent
         if count_downward_movement[tensor]:  # Parent -> Me
             stats.total_write_actions += stats.total_reads_to_parent * write_scale
             stats.max_per_unit_write_actions += (
-                stats.total_reads_to_parent * write_scale
+                stats.total_reads_to_parent * write_scale / n_active_physical_units
             )
             stats.total_skipped_first_write_actions += (
                 stats.total_skipped_first_reads_to_parent * write_scale
             )
             stats.min_per_unit_skipped_first_write_actions += (
-                stats.min_per_parent_skipped_first_reads_to_parent * write_scale
+                stats.min_per_parent_skipped_first_reads_to_parent * write_scale / n_active_physical_units
             )
 
         if count_upward_movement[tensor]:  # Me -> Parent
             # Comment this to have the final writeback to a buffer hit both that buffer and
             # go directly to the parent without incurring another read from the buffer.
             stats.total_read_actions += stats.total_writes_to_parent * read_scale
-            stats.max_per_unit_read_actions += stats.total_writes_to_parent * read_scale
+            stats.max_per_unit_read_actions += stats.total_writes_to_parent * read_scale / n_active_physical_units
 
         # ========================
         # Data exchanges with peer
@@ -828,7 +855,7 @@ def inherit_add(attr: str, default_value: Any = fills) -> Any:
             if count_downward_movement[tensor]:  # Me -> Child
                 stats.total_read_actions += child.total_reads_to_parent * read_scale
                 stats.max_per_unit_read_actions += (
-                    child.max_per_parent_reads_to_parent * read_scale
+                    child.max_per_parent_reads_to_parent * read_scale / n_active_physical_units
                 )
                 # Skip first read
                 if skip_initial:
@@ -836,13 +863,13 @@ def inherit_add(attr: str, default_value: Any = fills) -> Any:
                         child.total_skipped_first_reads_to_parent * read_scale
                     )
                     stats.min_per_unit_skipped_first_read_actions += (
-                        child.min_per_parent_skipped_first_reads_to_parent * read_scale
+                        child.min_per_parent_skipped_first_reads_to_parent * read_scale / n_active_physical_units
                     )
 
             if count_upward_movement[tensor]:  # Child -> Me
                 stats.total_write_actions += child.total_writes_to_parent * write_scale
                 stats.max_per_unit_write_actions += (
-                    child.max_per_parent_writes_to_parent * write_scale
+                    child.max_per_parent_writes_to_parent * write_scale / n_active_physical_units
                 )
 
     return child_result
@@ -902,19 +929,21 @@ def analyze_reservation(node_idx, current_shape, info: AnalysisInfo):
     child_result.buffet_stats[buffet] = stats
 
     # Reservation nodes are the first to produce stats for a network
-    network_node = info.job.spec_one_einsum.arch.find_first_of_type_above(
-        NetworkSpec, buffet.level, default=None
-    )
-    if network_node is not None:
-        network = Network(
-            tensor,
-            einsum_name,
-            info.data_movement_connections.get_src(buffet),
-            buffet,
-            component=network_node.name if network_node else network_node,
+    src = info.data_movement_connections.get_src(buffet)
+    if src is not None:
+        network_node = info.job.flattened_arch.find_first_of_type_between(
+            NetworkSpec, buffet.level, src.level, default=None
         )
-        assert network not in child_result.network_stats
-        child_result.network_stats[network] = NetworkStats()
+        if network_node is not None:
+            network = Network(
+                tensor,
+                einsum_name,
+                src,
+                buffet,
+                component=network_node.name if network_node else network_node,
+            )
+            assert network not in child_result.network_stats
+            child_result.network_stats[network] = NetworkStats()
 
     fanout_key = (node.resource, einsum_name)
     if fanout_key not in child_result.fanout:
@@ -964,18 +993,20 @@ def analyze_compute(
         stats.max_occupancy = 1
         result_accumulator.buffet_stats[buffet] = stats
 
-        network_node = info.job.spec_one_einsum.arch.find_first_of_type_above(
-            NetworkSpec, node.component, default=None
-        )
-        if network_node is not None:
-            network = Network(
-                tensor,
-                info.job.einsum_name,
-                info.data_movement_connections.get_src(buffet),
-                buffet,
-                component=network_node.name if network_node else network_node,
+        src = info.data_movement_connections.get_src(buffet)
+        if src is not None:
+            network_node = info.job.flattened_arch.find_first_of_type_between(
+                NetworkSpec, node.component, src.level, default=None
             )
-            result_accumulator.network_stats[network] = NetworkStats()
+            if network_node is not None:
+                network = Network(
+                    tensor,
+                    info.job.einsum_name,
+                    src,
+                    buffet,
+                    component=network_node.name if network_node else network_node,
+                )
+                result_accumulator.network_stats[network] = NetworkStats()
 
     return result_accumulator
 
diff --git a/tests/input_files/networked/flat.yaml b/tests/network/input_files/networked/flat.yaml
similarity index 61%
rename from tests/input_files/networked/flat.yaml
rename to tests/network/input_files/networked/flat.yaml
index 2c1b2cb0..28679d21 100644
--- a/tests/input_files/networked/flat.yaml
+++ b/tests/network/input_files/networked/flat.yaml
@@ -10,12 +10,6 @@ arch:
     - {name: read, energy: 0, throughput: inf}
     - {name: write, energy: 0, throughput: inf}
 
-  - !Network
-    name: NoC
-    area: 0
-    leak_power: 0
-    actions: []
-
   - !Array
     name: Array
     spatial:
@@ -37,10 +31,10 @@ arch:
       size: inf
       area: 0
       leak_power: 0
-      tensors: {keep: ~MainMemory, may_keep: All}
+      tensors: {keep: input, may_keep: input}
       actions:
-      - {name: read, energy: 0, throughput: inf}
-      - {name: write, energy: 0, throughput: inf}
+      - {name: read, energy: 5, throughput: 1}
+      - {name: write, energy: 5, throughput: inf}
       spatial:
       - {name: X, fanout: 4}
 
@@ -49,10 +43,10 @@ arch:
       size: inf
       area: 0
       leak_power: 0
-      tensors: {keep: ~MainMemory, may_keep: All}
+      tensors: {keep: output, may_keep: output}
       actions:
-      - {name: read, energy: 0, throughput: inf}
-      - {name: write, energy: 0, throughput: inf}
+      - {name: read, energy: 5, throughput: inf}
+      - {name: write, energy: 5, throughput: inf}
       spatial:
       - {name: Y, fanout: 4}
 
@@ -61,26 +55,34 @@ arch:
       size: inf
       area: 0
       leak_power: 0
-      tensors: {keep: ~MainMemory, may_keep: All}
+      tensors: {keep: weight, may_keep: weight}
       actions:
-      - {name: read, energy: 0, throughput: inf}
-      - {name: write, energy: 0, throughput: inf}
+      - {name: read, energy: 5, throughput: 1}
+      - {name: write, energy: 5, throughput: 1}
       spatial:
       - {name: X, fanout: 2}
       - {name: Y, fanout: 2}
 
+    - !Network
+      name: NoC
+      area: 0
+      leak_power: 0
+      actions:
+      - {name: hops, energy: 1, latency: 0, throughput: inf}
+
   - !Memory
     name: Scratchpad
     size: inf
     area: 0
     leak_power: 0
+    tensors: {keep: weight, may_keep: weight}
     actions:
-    - {name: read, energy: 0, throughput: inf}
-    - {name: write, energy: 0, throughput: inf}
+    - {name: read, energy: 1, throughput: inf}
+    - {name: write, energy: 1, throughput: inf}
 
   - !Compute
     name: MAC
     area: 0
     leak_power: 0
     actions:
-    - {name: compute, energy: 0, throughput: inf}
\ No newline at end of file
+    - {name: compute, energy: 1, throughput: inf}
diff --git a/tests/network/input_files/networked/hierarchical.yaml b/tests/network/input_files/networked/hierarchical.yaml
new file mode 100644
index 00000000..f268ef7e
--- /dev/null
+++ b/tests/network/input_files/networked/hierarchical.yaml
@@ -0,0 +1,58 @@
+arch:
+  nodes:
+  - !Memory
+    name: MainMemory
+    size: inf
+    area: 0
+    leak_power: 0
+    tensors: {keep: All}
+    actions:
+    - {name: read, energy: 0, throughput: 1e9}
+    - {name: write, energy: 0, throughput: 1e9}
+
+  - !Memory
+    name: GlobalBuffer
+    size: inf
+    area: 0
+    leak_power: 0
+    tensors: {keep: ~MainMemory, may_keep: All}
+    actions:
+    - {name: read, energy: 0, throughput: 4e9}
+    - {name: write, energy: 0, throughput: 4e9}
+
+  - !Network
+    name: PeArray
+    area: 0
+    leak_power: 0
+    actions:
+    - {name: hops, energy: 1, latency: 0, throughput: 4e9}
+
+  - !Memory
+    name: Scratchpad
+    size: inf
+    area: 0
+    leak_power: 0
+    tensors: {keep: All}
+    actions:
+    - {name: read, energy: 0, throughput: 16e9}
+    - {name: write, energy: 0, throughput: 16e9}
+    spatial:
+    - {name: X, fanout: 2}
+    - {name: Y, fanout: 2}
+
+  - !Network
+    name: MacArray
+    area: 0
+    leak_power: 0
+    actions:
+    - {name: hops, energy: 1, latency: 0, throughput: 16e9}
+
+  - !Compute
+    name: MAC
+    area: 0
+    leak_power: 0
+    actions:
+    - {name: compute, energy: 0, throughput: 1e9}
+    spatial:
+    - {name: X, fanout: 2}
+    - {name: Y, fanout: 2}
\ No newline at end of file
diff --git a/tests/input_files/networked/hierarchical_1d.yaml b/tests/network/input_files/networked/hierarchical_1d.yaml
similarity index 87%
rename from tests/input_files/networked/hierarchical_1d.yaml
rename to tests/network/input_files/networked/hierarchical_1d.yaml
index 15af7af1..167212ff 100644
--- a/tests/input_files/networked/hierarchical_1d.yaml
+++ b/tests/network/input_files/networked/hierarchical_1d.yaml
@@ -24,8 +24,9 @@ arch:
     name: PeArray
     area: 0
     leak_power: 0
+    total_latency: "max_hops"
     actions:
-    - {name: hops, energy: 1, throughput: inf}
+    - {name: hops, energy: 1, latency: 0, throughput: 1}
 
   - !Memory
     name: Scratchpad
@@ -44,7 +45,7 @@ arch:
     area: 0
     leak_power: 0
     actions:
-    - {name: hops, energy: 1, throughput: inf}
+    - {name: hops, energy: 1, latency: 1, throughput: inf}
 
   - !Compute
     name: MAC
diff --git a/tests/input_files/networked/hierarchical.yaml b/tests/network/input_files/networked/hierarchical_1d_all_to_all.yaml
similarity index 76%
rename from tests/input_files/networked/hierarchical.yaml
rename to tests/network/input_files/networked/hierarchical_1d_all_to_all.yaml
index 61d7cf70..bbb14f8c 100644
--- a/tests/input_files/networked/hierarchical.yaml
+++ b/tests/network/input_files/networked/hierarchical_1d_all_to_all.yaml
@@ -24,8 +24,9 @@ arch:
     name: PeArray
     area: 0
     leak_power: 0
+    total_latency: "max_hops"
     actions:
-    - {name: hops, energy: 1, throughput: inf}
+    - {name: hops, energy: 1, latency: 1, throughput: inf}
 
   - !Memory
     name: Scratchpad
@@ -37,15 +38,16 @@ arch:
     - {name: read, energy: 0, throughput: inf}
     - {name: write, energy: 0, throughput: inf}
     spatial:
-    - {name: X, fanout: 2}
-    - {name: Y, fanout: 2}
+    - {name: X, fanout: 4}
 
+  # All-to-all switch (NVLink-like): every node is one hop from every other
   - !Network
     name: MacArray
+    topology: all_to_all
     area: 0
     leak_power: 0
     actions:
-    - {name: hops, energy: 1, throughput: inf}
+    - {name: hops, energy: 1, latency: 1, throughput: inf}
 
   - !Compute
     name: MAC
@@ -54,5 +56,4 @@ arch:
     actions:
     - {name: compute, energy: 0, throughput: inf}
     spatial:
-    - {name: X, fanout: 2}
-    - {name: Y, fanout: 2}
\ No newline at end of file
+    - {name: X, fanout: 4}
diff --git a/tests/network/input_files/networked/hierarchical_switched.yaml b/tests/network/input_files/networked/hierarchical_switched.yaml
new file mode 100644
index 00000000..0bfd6592
--- /dev/null
+++ b/tests/network/input_files/networked/hierarchical_switched.yaml
@@ -0,0 +1,58 @@
+arch:
+  nodes:
+  - !Memory
+    name: MainMemory
+    size: inf
+    area: 0
+    leak_power: 0
+    tensors: {keep: All}
+    actions:
+    - {name: read, energy: 100, latency: 1e-9}
+    - {name: write, energy: 100, latency: 1e-9}
+
+  - !Memory
+    name: GlobalBuffer
+    size: inf
+    area: 0
+    leak_power: 0
+    tensors: {keep: ~MainMemory, may_keep: All}
+    actions:
+    - {name: read, energy: 10, latency: 1e-9/4}
+    - {name: write, energy: 10, latency: 1e-9/4}
+
+  - !Network
+    name: PeArray
+    area: 0
+    leak_power: 0
+    actions:
+    - {name: hops, energy: 5, latency: 1e-9/4}
+
+  - !Memory
+    name: Scratchpad
+    size: inf
+    area: 0
+    leak_power: 0
+    tensors: {keep: All}
+    actions:
+    - {name: read, energy: 2, latency: 1e-9/16}
+    - {name: write, energy: 2, latency: 1e-9/16}
+    spatial:
+    - {name: X, fanout: 2}
+    - {name: Y, fanout: 2}
+
+  - !Network
+    name: MacArray
+    area: 0
+    leak_power: 0
+    actions:
+    - {name: hops, energy: 1, latency: 1e-9/16}
+
+  - !Compute
+    name: MAC
+    area: 0
+    leak_power: 0
+    actions:
+    - {name: compute, energy: 1, latency: 1e-9}
+    spatial:
+    - {name: X, fanout: 2}
+    - {name: Y, fanout: 2}
\ No newline at end of file
diff --git a/tests/network/input_files/networked/one_matmul_to_flat.yaml b/tests/network/input_files/networked/one_matmul_to_flat.yaml
new file mode 100644
index 00000000..cf7d2f17
--- /dev/null
+++ b/tests/network/input_files/networked/one_matmul_to_flat.yaml
@@ -0,0 +1,42 @@
+mapping:
+  nodes:
+  - !Storage
+    component: MainMemory
+    tensors: [T0, T1, W0]
+  - !Storage
+    component: DistributedBuffer
+    tensors: [W0]
+  - !Temporal
+    rank_variable: m
+    tile_shape: {{ M_TILE }}
+  - !Storage
+    component: RowBuffer
+    tensors: [T0]
+  - !Storage
+    component: ColumnBuffer
+    tensors: [T1]
+  - !Spatial
+    rank_variable: n0
+    tile_shape: {{ MAC_TILE }}
+    component: Array
+    name: X
+  - !Spatial
+    rank_variable: n1
+    tile_shape: {{ MAC_TILE }}
+    component: Array
+    name: Y
+  - !Storage
+    component: Scratchpad
+    tensors: [T0, T1, W0]
+  - !Temporal
+    rank_variable: m
+    tile_shape: 1
+  - !Temporal
+    rank_variable: n0
+    tile_shape: 1
+  - !Temporal
+    rank_variable: n1
+    tile_shape: 1
+  - !Compute
+    einsum: Matmul0
+    component: MAC
\ No newline at end of file
diff --git a/tests/input_files/networked/one_matmul_to_networked_hierarchical.yaml b/tests/network/input_files/networked/one_matmul_to_networked_hierarchical.yaml
similarity index 100%
rename from tests/input_files/networked/one_matmul_to_networked_hierarchical.yaml
rename to tests/network/input_files/networked/one_matmul_to_networked_hierarchical.yaml
diff --git a/tests/input_files/networked/one_matmul_to_networked_hierarchical_1d.yaml b/tests/network/input_files/networked/one_matmul_to_networked_hierarchical_1d.yaml
similarity index 100%
rename from tests/input_files/networked/one_matmul_to_networked_hierarchical_1d.yaml
rename to tests/network/input_files/networked/one_matmul_to_networked_hierarchical_1d.yaml
diff --git a/tests/network/test_network.py b/tests/network/test_network.py
new file mode 100644
index 00000000..04e6e6ba
--- /dev/null
+++ b/tests/network/test_network.py
@@ -0,0 +1,451 @@
+from pathlib import Path
+from unittest import TestCase
+
+import accelforge as af
+
+INPUT_FILES_DIR = Path(__file__).parent / "input_files" / "networked"
+
+
+class TestParsing(TestCase):
+    def test_hierarchical(self):
+        spec = af.Spec.from_yaml(
+            INPUT_FILES_DIR / "hierarchical.yaml",
+        )
+        self.assertIn("PeArray", spec.arch.nodes)
+        self.assertEqual(spec.arch.nodes["PeArray"].get_fanout(), 1)
+        self.assertIn("Scratchpad", spec.arch.nodes)
+        self.assertEqual(spec.arch.nodes["Scratchpad"].get_fanout(), 4)
+        self.assertIn("MacArray", spec.arch.nodes)
+        self.assertEqual(spec.arch.nodes["MacArray"].get_fanout(), 1)
+
+        try:
+            spec = spec.calculate_component_costs()
+        except af.EvaluationError as e:
+            self.fail(e.message)
+
+    def test_flat(self):
+        spec = af.Spec.from_yaml(
+            INPUT_FILES_DIR / "flat.yaml",
+        )
+
+        try:
+            spec = spec.calculate_component_costs()
+        except af.EvaluationError as e:
+            self.fail(e.message)
+
+
+class TestModelMesh(TestCase):
+    def test_hierarchical_1d(self):
+        M = 8
+        KN = 8
+        MAC_TILE = 2
+        M_TILE = 4
+        BITS_PER_VALUE = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            # af.examples.arches.networked.hierarchical,
+            INPUT_FILES_DIR / "hierarchical_1d.yaml",
+            # af.examples.mappings.one_matmul_to_networked_hierarchical,
+            INPUT_FILES_DIR / "one_matmul_to_networked_hierarchical_1d.yaml",
+            jinja_parse_data={
+                "N_EINSUMS": 1,
+                "M": 8,
+                "KN": 8,
+                "MAC_TILE": MAC_TILE,
+                "M_TILE": M_TILE,
+            },
+        )
+        result = spec.evaluate_mapping()
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN / MAC_TILE)  # number of used Scratchpad
+            * M_TILE
+            * KN  # temporal for n1 in mapping
+            * sum(i for i in range(MAC_TILE))  # unicast along X-axis of MacArray
+            * BITS_PER_VALUE,
+        )
+        # NOTE: assuming XY routing (as defined in mapping)
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T1<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN / MAC_TILE)
+            * M_TILE
+            * KN  # temporal for n1 in mapping
+            * (MAC_TILE - 1)   # multicast along X-axis of MacArray
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>W0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN / MAC_TILE)
+            * M_TILE
+            * KN
+            * sum(i for i in range(MAC_TILE))
+            * BITS_PER_VALUE,
+        )
+
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * sum(i for i in range(KN // MAC_TILE))  # unicast along X-axis of PeArray
+            * M_TILE
+            * MAC_TILE
+            * BITS_PER_VALUE,
+        )
+        # NOTE: assuming XY routing (as defined in mapping)
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T1<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN // MAC_TILE - 1)  # multicast along X-axis of PeArray
+            * M_TILE
+            * KN
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>W0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * sum(i for i in range(KN // MAC_TILE))  # unicast along PeArray
+            * MAC_TILE
+            * KN
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Total<SEP>latency"].iloc[0],
+            4
+        )
+
+    def test_hierarchical(self):
+        M = 8
+        KN = 8
+        MAC_TILE = 2
+        PE_TILE = KN // MAC_TILE
+        M_TILE = 4
+        BITS_PER_VALUE = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            # af.examples.arches.networked.hierarchical,
+            INPUT_FILES_DIR / "hierarchical.yaml",
+            # af.examples.mappings.one_matmul_to_networked_hierarchical,
+            INPUT_FILES_DIR / "one_matmul_to_networked_hierarchical.yaml",
+            jinja_parse_data={
+                "N_EINSUMS": 1,
+                "M": 8,
+                "KN": 8,
+                "MAC_TILE": MAC_TILE,
+                "M_TILE": M_TILE,
+            },
+        )
+        result = spec.evaluate_mapping()
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN / MAC_TILE) ** 2
+            * M_TILE
+            * (
+                sum(i for i in range(MAC_TILE))  # unicasting along X
+                +
+                MAC_TILE * (MAC_TILE-1)  # multicast along Y for each column
+            )
+            * BITS_PER_VALUE,
+        )
+        # NOTE: assuming XY routing (as defined in mapping)
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T1<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN / MAC_TILE) ** 2
+            * M_TILE
+            * (
+                MAC_TILE * (MAC_TILE - 1)  # multicast along X (the tile is shape N1, which is MAC_TILE here)
+                +
+                MAC_TILE * sum(i for i in range(MAC_TILE))  # unicasting along Y for each row
+            )
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>W0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN / MAC_TILE) ** 2
+            * M_TILE
+            * (
+                MAC_TILE * sum(i for i in range(MAC_TILE))  # unicast along X (the tile is shape N1, which is MAC_TILE here)
+                +
+                MAC_TILE * sum(i for i in range(MAC_TILE))  # unicasting along Y for each row
+            )
+            * BITS_PER_VALUE,
+        )
+
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (
+                sum(i for i in range(PE_TILE))
+                +
+                PE_TILE * (PE_TILE - 1)
+            )
+            # tile shape
+            * M_TILE * MAC_TILE * BITS_PER_VALUE,
+        )
+        # NOTE: assuming XY routing (as defined in mapping)
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T1<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (
+                PE_TILE * (PE_TILE - 1)
+                +
+                PE_TILE * sum(i for i in range(PE_TILE))
+            )
+            * M_TILE
+            * MAC_TILE
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>W0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (
+                PE_TILE * sum(i for i in range(PE_TILE))
+                +
+                PE_TILE * sum(i for i in range(PE_TILE))
+            )
+            * MAC_TILE**2
+            * BITS_PER_VALUE,
+        )
+
+    def test_flat(self):
+        M = 8
+        KN = 8
+        MAC_TILE = 2
+        M_TILE = 4
+        BITS_PER_VALUE = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            INPUT_FILES_DIR / "flat.yaml",
+            INPUT_FILES_DIR / "one_matmul_to_flat.yaml",
+            jinja_parse_data={
+                "N_EINSUMS": 1,
+                "M": 8,
+                "KN": 8,
+                "MAC_TILE": MAC_TILE,
+                "M_TILE": M_TILE,
+            },
+        )
+        result = spec.evaluate_mapping()
+        self.assertEqual(
+            result.data['Matmul0<SEP>action<SEP>NoC<SEP>T0<SEP>hops'].iloc[0],
+            (
+                M / M_TILE
+                *
+                (KN / MAC_TILE) * (KN / MAC_TILE - 1)   # num rows * multicast_hops
+                *
+                M_TILE * MAC_TILE  # tile shape
+                *
+                BITS_PER_VALUE
+            )
+        )
+        self.assertEqual(
+            result.data['Matmul0<SEP>action<SEP>NoC<SEP>T1<SEP>hops'].iloc[0],
+            (
+                M / M_TILE
+                *
+                (KN / MAC_TILE) * (KN / MAC_TILE - 1)   # num rows * multicast_hops
+                *
+                M_TILE * MAC_TILE  # tile shape
+                *
+                BITS_PER_VALUE
+            )
+        )
+        self.assertEqual(
+            result.data['Matmul0<SEP>action<SEP>NoC<SEP>W0<SEP>hops'].iloc[0],
+            (
+                M / M_TILE
+                *
+                (
+                    4   # a 2x2 grid of physical buffers
+                    *
+                    (
+                        sum(i for i in range(2)) * MAC_TILE  # unicast along row * tile shape
+                        +
+                        2 * sum(i for i in range(2))  # num cols * unicast down col
+                    )
+                )
+                *
+                MAC_TILE * MAC_TILE  # tile shape
+                *
+                BITS_PER_VALUE
+            )
+        )
+        self.assertEqual(
+            result.data['Matmul0<SEP>action<SEP>RowBuffer<SEP>T0<SEP>read'].iloc[0],
+            (
+                M / M_TILE
+                *
+                KN // MAC_TILE
+                *
+                M_TILE * MAC_TILE
+                *
+                BITS_PER_VALUE
+            )
+        )
+        self.assertEqual(
+            result.data['Matmul0<SEP>latency<SEP>RowBuffer'].iloc[0],
+            (
+                M / M_TILE
+                *
+                KN // MAC_TILE
+                *
+                M_TILE * MAC_TILE
+                *
+                BITS_PER_VALUE
+                /
+                4    # num of physical RowBuffer
+            )
+        )
+        self.assertEqual(
+            result.data['Matmul0<SEP>latency<SEP>DistributedBuffer'].iloc[0],
+            (   # Reads from child
+                M / M_TILE
+                *
+                KN // MAC_TILE
+                *
+                KN // MAC_TILE
+                *
+                MAC_TILE * MAC_TILE  # tile shape
+                *
+                BITS_PER_VALUE
+                /
+                4    # num of physical DistributedBuffer
+            )
+            +
+            (   # Writes from parent
+                KN // MAC_TILE
+                *
+                KN // MAC_TILE
+                *
+                MAC_TILE * MAC_TILE  # tile shape
+                *
+                BITS_PER_VALUE
+                /
+                4    # num of physical DistributedBuffer
+            )
+        )
+
+
+class TestModelAllToAll(TestCase):
+    """MacArray is an all-to-all switch (NVLink-like). PeArray is a mesh."""
+
+    def test_hierarchical_1d_all_to_all(self):
+        M = 8
+        KN = 8
+        MAC_TILE = 4
+        M_TILE = 4
+        BITS_PER_VALUE = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            INPUT_FILES_DIR / "hierarchical_1d_all_to_all.yaml",
+            INPUT_FILES_DIR / "one_matmul_to_networked_hierarchical_1d.yaml",
+            jinja_parse_data={
+                "N_EINSUMS": 1,
+                "M": M,
+                "KN": KN,
+                "MAC_TILE": MAC_TILE,
+                "M_TILE": M_TILE,
+            },
+        )
+        result = spec.evaluate_mapping()
+
+        # --- MacArray: all-to-all switch ---------------------------------
+        # Every node is one hop away 
+        all_to_all = (
+            (M / M_TILE)
+            * (KN / MAC_TILE)  # number of used Scratchpad
+            * M_TILE
+            * KN  # temporal for n1 in mapping
+            * (MAC_TILE - 1)  # one hop per destination, for every tensor
+            * BITS_PER_VALUE
+        )
+        for tensor in ("T0", "T1", "W0"):
+            self.assertEqual(
+                result.data[
+                    f"Matmul0<SEP>action<SEP>MacArray<SEP>{tensor}<SEP>hops"
+                ].iloc[0],
+                all_to_all,
+                msg=f"unexpected MacArray hops for {tensor}",
+            )
+
+        # --- PeArray: still a mesh ---------------------------------------
+        # Unchanged from test_hierarchical_1d, so the mesh formulas hold (now
+        # with MAC_TILE = 4, i.e. KN // MAC_TILE = 2).
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * sum(i for i in range(KN // MAC_TILE))  # unicast along X of PeArray
+            * M_TILE
+            * MAC_TILE
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T1<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * (KN // MAC_TILE - 1)  # multicast along X of PeArray
+            * M_TILE
+            * KN
+            * BITS_PER_VALUE,
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>W0<SEP>hops"].iloc[0],
+            (M / M_TILE)
+            * sum(i for i in range(KN // MAC_TILE))  # unicast along PeArray
+            * MAC_TILE
+            * KN
+            * BITS_PER_VALUE,
+        )
+
+        # --- Latency ------------------------------------------------------
+        # The switch's uniform single-hop routing gives MacArray a constant
+        # latency of 1, versus the mesh PeArray's 2.
+        self.assertEqual(
+            result.data["Matmul0<SEP>latency<SEP>MacArray"].iloc[0], 1
+        )
+        self.assertEqual(
+            result.data["Matmul0<SEP>latency<SEP>PeArray"].iloc[0], 2
+        )
+        self.assertEqual(result.data["Total<SEP>latency"].iloc[0], 2)
+
+
+class TestMapper(TestCase):
+    def test_hierarchical(self):
+        M = 8
+        KN = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            INPUT_FILES_DIR / "hierarchical.yaml",
+            jinja_parse_data={"N_EINSUMS": 1, "M": M, "KN": KN}
+        )
+        result = spec.map_workload_to_arch()
+
+    def test_flat(self):
+        M = 8
+        KN = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            INPUT_FILES_DIR / "flat.yaml",
+            jinja_parse_data={"N_EINSUMS": 1, "M": M, "KN": KN}
+        )
+        result = spec.map_workload_to_arch()
+
+    def test_flat_one_row_buffer(self):
+        M = 8
+        KN = 8
+
+        spec = af.Spec.from_yaml(
+            af.examples.workloads.matmuls,
+            INPUT_FILES_DIR / "flat.yaml",
+            jinja_parse_data={"N_EINSUMS": 1, "M": M, "KN": KN, "N_ROW_BUFFER": 1}
+        )
+        result = spec.map_workload_to_arch()
diff --git a/tests/network/test_topology_model.py b/tests/network/test_topology_model.py
new file mode 100644
index 00000000..36dcc31c
--- /dev/null
+++ b/tests/network/test_topology_model.py
@@ -0,0 +1,168 @@
+from unittest import TestCase
+
+from accelforge.frontend.arch.components import TopologySpec
+from accelforge.frontend._workload_isl._symbolic import (
+    Irrelevant,
+    PartiallyRelevant,
+    Relevant,
+)
+from accelforge.model._looptree.reuse.symbolic._network import (
+    AllToAllTopologyModel,
+    MeshTopologyModel,
+    get_topology_model,
+)
+
+
+class _NoDistribution:
+    """Stand-in source component that is not physically distributed."""
+
+    def _get_physical_fanout_along(self, dim_name, default=1):
+        return 1
+
+
+class _Distributed:
+    """Stand-in source component physically distributed along a dimension."""
+
+    def __init__(self, fanout, stride):
+        self.fanout = fanout
+        self.stride = stride
+
+    def _get_physical_fanout_along(self, dim_name, default=1):
+        return self.fanout
+
+    def _get_physical_stride_along(self, dim_name):
+        return self.stride
+
+
+class TestMeshTopologyModel(TestCase):
+    """Unit tests for the mesh cost model in isolation."""
+
+    def _cost(self, relevancy, *, n, stride, volume=10, src=None):
+        return MeshTopologyModel().per_loop_transfer_cost(
+            relevancy,
+            shape_repeats=n,
+            last_fanout=stride,
+            volume=volume,
+            src_component=src if src is not None else _NoDistribution(),
+            dim_name="X",
+        )
+
+    def test_registry_resolves_model(self):
+        self.assertIsInstance(get_topology_model(TopologySpec.MESH), MeshTopologyModel)
+        self.assertIsInstance(get_topology_model("mesh"), MeshTopologyModel)
+
+    def test_multicast(self):
+        # Irrelevant: one value flows down the line, dropped at each of the
+        # (n - 1) downstream nodes. Each link carries it at most once.
+        n, stride, volume = 4, 2, 10
+        cost = self._cost(Irrelevant(), n=n, stride=stride, volume=volume)
+        self.assertEqual(cost.total_cost, (n - 1) * stride * volume)
+        self.assertEqual(cost.max_hops, n * stride)
+        self.assertEqual(cost.max_traffic, volume)
+
+    def test_unicast(self):
+        # Relevant (not distributed): each destination needs its own data
+        # delivered i*stride hops away, so the total is quadratic and the link
+        # nearest the source carries traffic for all (n - 1) downstream nodes.
+        n, stride, volume = 4, 2, 10
+        cost = self._cost(Relevant("n0"), n=n, stride=stride, volume=volume)
+        self.assertEqual(cost.total_cost, sum(range(n)) * stride * volume)
+        self.assertEqual(cost.max_hops, n * stride)
+        self.assertEqual(cost.max_traffic, (n - 1) * volume)
+
+    def test_unicast_distributed_binds_locally(self):
+        # When the source is physically distributed, data binds as locally as
+        # possible, reducing hops relative to the non-distributed unicast.
+        n, stride, volume = 4, 1, 10
+        src = _Distributed(fanout=2, stride=4)
+        cost = self._cost(Relevant("n0"), n=n, stride=stride, volume=volume, src=src)
+
+        # physical_stride / last_fanout = 4, capped at shape_repeats = 4
+        n_dsts_per_physical = 4
+        n_activated_physical = 1  # n*stride / physical_stride = 4/4
+        self.assertEqual(
+            cost.total_cost,
+            n_activated_physical * sum(range(n_dsts_per_physical)) * stride * volume,
+        )
+        self.assertEqual(cost.max_hops, (n_dsts_per_physical - 1) * stride)
+        self.assertEqual(cost.max_traffic, (n_dsts_per_physical - 1) * volume)
+
+    def test_partially_relevant_not_implemented(self):
+        with self.assertRaises(NotImplementedError):
+            self._cost(PartiallyRelevant("n0"), n=4, stride=2)
+
+
+class TestAllToAllTopologyModel(TestCase):
+    """Unit tests for the all-to-all (switch) cost model in isolation."""
+
+    def _cost(self, relevancy, n, *, volume=10, last_fanout=99):
+        # last_fanout is deliberately large and arbitrary: an all-to-all switch
+        # must ignore physical stride entirely.
+        return AllToAllTopologyModel().per_loop_transfer_cost(
+            relevancy,
+            shape_repeats=n,
+            last_fanout=last_fanout,
+            volume=volume,
+            src_component=_NoDistribution(),
+            dim_name="X",
+        )
+
+    def test_registry_resolves_model(self):
+        # Resolves both by enum and by the StrEnum value (the form that survives
+        # the arch evaluation pipeline).
+        self.assertIsInstance(
+            get_topology_model(TopologySpec.ALL_TO_ALL), AllToAllTopologyModel
+        )
+        self.assertIsInstance(get_topology_model("all_to_all"), AllToAllTopologyModel)
+
+    def test_multicast(self):
+        n, volume = 5, 10
+        cost = self._cost(Irrelevant(), n, volume=volume)
+        # Linear in destinations, one switch hop, shared link traffic.
+        self.assertEqual(cost.total_cost, (n - 1) * volume)
+        self.assertEqual(cost.max_hops, AllToAllTopologyModel.HOPS_PER_TRANSFER)
+        self.assertEqual(cost.max_traffic, volume)
+
+    def test_unicast(self):
+        n, volume = 5, 10
+        cost = self._cost(Relevant("n0"), n, volume=volume)
+        # Same (linear) total cost as multicast and constant hops, but the
+        # source's uplink to the switch carries every distinct message.
+        self.assertEqual(cost.total_cost, (n - 1) * volume)
+        self.assertEqual(cost.max_hops, AllToAllTopologyModel.HOPS_PER_TRANSFER)
+        self.assertEqual(cost.max_traffic, (n - 1) * volume)
+
+    def test_independent_of_stride(self):
+        # Stride (last_fanout) must not affect any component of the cost.
+        a = self._cost(Relevant("n0"), 5, last_fanout=1)
+        b = self._cost(Relevant("n0"), 5, last_fanout=1000)
+        self.assertEqual(
+            (a.total_cost, a.max_hops, a.max_traffic),
+            (b.total_cost, b.max_hops, b.max_traffic),
+        )
+
+    def test_linear_unlike_mesh_quadratic(self):
+        # Against an identical mesh scenario, all-to-all unicast is linear while
+        # the mesh is quadratic, and all-to-all hops are constant (< distance).
+        n, volume, stride = 6, 1, 1
+        kwargs = dict(
+            shape_repeats=n,
+            last_fanout=stride,
+            volume=volume,
+            src_component=_NoDistribution(),
+            dim_name="X",
+        )
+        a2a = AllToAllTopologyModel().per_loop_transfer_cost(Relevant("n0"), **kwargs)
+        mesh = MeshTopologyModel().per_loop_transfer_cost(Relevant("n0"), **kwargs)
+
+        self.assertEqual(a2a.total_cost, (n - 1) * volume)
+        self.assertEqual(mesh.total_cost, sum(range(n)) * stride * volume)
+        self.assertLess(a2a.total_cost, mesh.total_cost)
+        self.assertLess(a2a.max_hops, mesh.max_hops)
+
+    def test_accumulate_max_hops_persists(self):
+        # overall_max_hops accumulates across calls for a given network.
+        model = AllToAllTopologyModel()
+        h = AllToAllTopologyModel.HOPS_PER_TRANSFER
+        self.assertEqual(model.accumulate_max_hops("net", h), h)
+        self.assertEqual(model.accumulate_max_hops("net", h), 2 * h)
diff --git a/tests/not_working/networks.ipynb b/tests/not_working/networks.ipynb
index 9532c809..1687d547 100644
--- a/tests/not_working/networks.ipynb
+++ b/tests/not_working/networks.ipynb
@@ -1,102 +1,197 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "43938186",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import accelforge as af"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "88205db3",
-   "metadata": {},
-   "source": [
-    "Below, we render a completely hierarchical architecture with two networks."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "49a31e7a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "spec = af.Spec.from_yaml(af.examples.arches.networked.hierarchical)\n",
-    "spec.arch"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "389cb739",
-   "metadata": {},
-   "source": [
-    "Now, we render an architecture with certain components in a flat organization, and others in a hierarchy."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9a11eec1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "spec = af.Spec.from_yaml(af.examples.arches.networked.flat)\n",
-    "spec.arch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d2bbda8a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "spec.calculate_component_costs()._get_flattened_architecture()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a6a508a5",
-   "metadata": {},
-   "source": [
-    "Finally, here is a simplified rack-scale architecture."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cc2df4b6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "spec = af.Spec.from_yaml(af.examples.arches.networked.rack)\n",
-    "spec.arch"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
+    "cells": [
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "43938186",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "import accelforge as af"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "88205db3",
+            "metadata": {},
+            "source": [
+                "Below, we render a completely hierarchical architecture with two networks."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "49a31e7a",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "spec = af.Spec.from_yaml(\n",
+                "    af.examples.arches.networked.hierarchical,\n",
+                "    af.examples.workloads.matmuls,\n",
+                "    jinja_parse_data={\"N_EINSUMS\": 1}\n",
+                ")\n",
+                "spec.arch"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "a64424bb",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "spec.mapper.metrics = af.mapper.Metrics.LATENCY | af.mapper.Metrics.ENERGY\n",
+                "result = spec.map_workload_to_arch()"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "9cc6ed1d",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "result.data"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "f0dadcac",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "result.energy(per_component=True)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "389cb739",
+            "metadata": {},
+            "source": [
+                "Now, we render an architecture with certain components in a flat organization, and others in a hierarchy."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "9a11eec1",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "spec = af.Spec.from_yaml(\n",
+                "    af.examples.arches.networked.flat,\n",
+                "    af.examples.workloads.matmuls,\n",
+                "    jinja_parse_data={\"N_EINSUMS\": 1}\n",
+                ")\n",
+                "spec.arch"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "d2bbda8a",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "result = spec.map_workload_to_arch()\n",
+                "result"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "1e2b3332",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "from accelforge.plotting.mappings import plot_energy_breakdown\n",
+                "\n",
+                "plot_energy_breakdown([result], separate_by=[\"component\"], stack_by=[\"tensor\"])"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "741719fa",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "spec = af.Spec.from_yaml(\n",
+                "    af.examples.arches.networked.flat,\n",
+                "    af.examples.workloads.matmuls,\n",
+                "    jinja_parse_data={\"N_EINSUMS\": 1, \"N_ROW_BUFFER\": 1, \"N_COL_BUFFER\": 1},\n",
+                ")\n",
+                "spec.arch"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "a62e6dfa",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "result = spec.map_workload_to_arch()\n",
+                "plot_energy_breakdown([result], separate_by=[\"component\"], stack_by=[\"tensor\"])"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "929f5399",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "result.data[[c for c in result.data.columns if \"hops\" in c]]"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "a6a508a5",
+            "metadata": {},
+            "source": [
+                "Finally, here is a simplified rack-scale architecture."
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "cc2df4b6",
+            "metadata": {},
+            "outputs": [],
+            "source": [
+                "spec = af.Spec.from_yaml(af.examples.arches.networked.rack)\n",
+                "spec.arch"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": null,
+            "id": "3e54780a",
+            "metadata": {},
+            "outputs": [],
+            "source": []
+        }
+    ],
+    "metadata": {
+        "kernelspec": {
+            "display_name": "Python 3",
+            "language": "python",
+            "name": "python3"
+        },
+        "language_info": {
+            "codemirror_mode": {
+                "name": "ipython",
+                "version": 3
+            },
+            "file_extension": ".py",
+            "mimetype": "text/x-python",
+            "name": "python",
+            "nbconvert_exporter": "python",
+            "pygments_lexer": "ipython3"
+        }
+    },
+    "nbformat": 4,
+    "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/tests/test_network.py b/tests/test_network.py
deleted file mode 100644
index e8b8d567..00000000
--- a/tests/test_network.py
+++ /dev/null
@@ -1,249 +0,0 @@
-from pathlib import Path
-from unittest import TestCase
-
-import accelforge as af
-
-INPUT_FILES_DIR = Path(__file__).parent / "input_files" / "networked"
-
-
-class TestParsing(TestCase):
-    def test_hierarchical(self):
-        spec = af.Spec.from_yaml(
-            # af.examples.arches.networked.hierarchical,
-            INPUT_FILES_DIR
-            / "hierarchical.yaml",
-        )
-        self.assertIn("PeArray", spec.arch.nodes)
-        self.assertEqual(spec.arch.nodes["PeArray"].get_fanout(), 1)
-        self.assertIn("Scratchpad", spec.arch.nodes)
-        self.assertEqual(spec.arch.nodes["Scratchpad"].get_fanout(), 4)
-        self.assertIn("MacArray", spec.arch.nodes)
-        self.assertEqual(spec.arch.nodes["MacArray"].get_fanout(), 1)
-
-        try:
-            spec = spec.calculate_component_costs()
-        except af.EvaluationError as e:
-            self.fail(e.message)
-
-    def test_flat(self):
-        spec = af.Spec.from_yaml(
-            # af.examples.arches.networked.flat,
-            INPUT_FILES_DIR
-            / "flat.yaml",
-        )
-        self.assertIn("NoC", spec.arch.nodes)
-        self.assertEqual(spec.arch.nodes["NoC"].get_fanout(), 1)
-        self.assertEqual(
-            {n.name for n in spec.arch.get_nodes_of_type(af.spec.Leaf)},
-            {
-                "MainMemory",
-                "GlobalBuffer",
-                "NoC",
-                "RowBuffer",
-                "ColumnBuffer",
-                "DistributedBuffer",
-                "Scratchpad",
-                "MAC",
-            },
-        )
-
-        try:
-            spec = spec.calculate_component_costs()
-        except af.EvaluationError as e:
-            self.fail(e.message)
-
-
-class TestModel(TestCase):
-    def test_hierarchical_1d(self):
-        M = 8
-        KN = 8
-        MAC_TILE = 2
-        M_TILE = 4
-        BITS_PER_VALUE = 8
-
-        spec = af.Spec.from_yaml(
-            af.examples.workloads.matmuls,
-            # af.examples.arches.networked.hierarchical,
-            INPUT_FILES_DIR / "hierarchical_1d.yaml",
-            # af.examples.mappings.one_matmul_to_networked_hierarchical,
-            INPUT_FILES_DIR / "one_matmul_to_networked_hierarchical_1d.yaml",
-            jinja_parse_data={
-                "N_EINSUMS": 1,
-                "M": 8,
-                "KN": 8,
-                "MAC_TILE": MAC_TILE,
-                "M_TILE": M_TILE,
-            },
-        )
-        result = spec.evaluate_mapping()
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (KN / MAC_TILE)  # number of used Scratchpad
-            * M_TILE
-            * KN  # temporal for n1 in mapping
-            * sum(i + 1 for i in range(MAC_TILE))  # unicast along X-axis of MacArray
-            * BITS_PER_VALUE,
-        )
-        # NOTE: assuming XY routing (as defined in mapping)
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T1<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (KN / MAC_TILE)
-            * M_TILE
-            * KN  # temporal for n1 in mapping
-            * MAC_TILE  # multicast along X-axis of MacArray
-            * BITS_PER_VALUE,
-        )
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>W0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (KN / MAC_TILE)
-            * M_TILE
-            * KN
-            * sum(i + 1 for i in range(MAC_TILE))
-            * BITS_PER_VALUE,
-        )
-
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * sum(
-                i + 1 for i in range(KN // MAC_TILE)
-            )  # unicast along X-axis of PeArray
-            * M_TILE
-            * MAC_TILE
-            * BITS_PER_VALUE,
-        )
-        # NOTE: assuming XY routing (as defined in mapping)
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T1<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * KN
-            // MAC_TILE  # multicast along X-axis of PeArray
-            * M_TILE
-            * KN
-            * BITS_PER_VALUE,
-        )
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>W0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * sum(i + 1 for i in range(KN // MAC_TILE))  # unicast along PeArray
-            * MAC_TILE
-            * KN
-            * BITS_PER_VALUE,
-        )
-
-    def test_hierarchical(self):
-        M = 8
-        KN = 8
-        MAC_TILE = 2
-        PE_TILE = KN // MAC_TILE
-        M_TILE = 4
-        BITS_PER_VALUE = 8
-
-        spec = af.Spec.from_yaml(
-            af.examples.workloads.matmuls,
-            # af.examples.arches.networked.hierarchical,
-            INPUT_FILES_DIR / "hierarchical.yaml",
-            # af.examples.mappings.one_matmul_to_networked_hierarchical,
-            INPUT_FILES_DIR / "one_matmul_to_networked_hierarchical.yaml",
-            jinja_parse_data={
-                "N_EINSUMS": 1,
-                "M": 8,
-                "KN": 8,
-                "MAC_TILE": MAC_TILE,
-                "M_TILE": M_TILE,
-            },
-        )
-        result = spec.evaluate_mapping()
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (KN / MAC_TILE) ** 2
-            * M_TILE
-            * (
-                sum(i + 1 for i in range(MAC_TILE))  # unicasting along X
-                + MAC_TILE * MAC_TILE  # multicast along Y for each column
-            )
-            * BITS_PER_VALUE,
-        )
-        # NOTE: assuming XY routing (as defined in mapping)
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>T1<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (KN / MAC_TILE) ** 2
-            * M_TILE
-            * (
-                MAC_TILE
-                * MAC_TILE  # multicast along X (the tile is shape N1, which is MAC_TILE here)
-                + MAC_TILE
-                * sum(i + 1 for i in range(MAC_TILE))  # unicasting along Y for each row
-            )
-            * BITS_PER_VALUE,
-        )
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>MacArray<SEP>W0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (KN / MAC_TILE) ** 2
-            * M_TILE
-            * (
-                MAC_TILE
-                * sum(
-                    i + 1 for i in range(MAC_TILE)
-                )  # unicast along X (the tile is shape N1, which is MAC_TILE here)
-                + MAC_TILE
-                * sum(i + 1 for i in range(MAC_TILE))  # unicasting along Y for each row
-            )
-            * BITS_PER_VALUE,
-        )
-
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T0<SEP>hops"].iloc[0],
-            (M / M_TILE) * (sum(i + 1 for i in range(PE_TILE)) + PE_TILE * PE_TILE)
-            # tile shape
-            * M_TILE * MAC_TILE * BITS_PER_VALUE,
-        )
-        # NOTE: assuming XY routing (as defined in mapping)
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>T1<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (PE_TILE * PE_TILE + PE_TILE * sum(i + 1 for i in range(PE_TILE)))
-            * M_TILE
-            * MAC_TILE
-            * BITS_PER_VALUE,
-        )
-        self.assertEqual(
-            result.data["Matmul0<SEP>action<SEP>PeArray<SEP>W0<SEP>hops"].iloc[0],
-            (M / M_TILE)
-            * (
-                PE_TILE * sum(i + 1 for i in range(PE_TILE))
-                + PE_TILE * sum(i + 1 for i in range(PE_TILE))
-            )
-            * MAC_TILE**2
-            * BITS_PER_VALUE,
-        )
-
-
-class TestMapper(TestCase):
-    def test_hierarchical(self):
-        M = 8
-        KN = 8
-        MAC_TILE = 2
-        PE_TILE = KN // MAC_TILE
-        M_TILE = 4
-        BITS_PER_VALUE = 8
-
-        spec = af.Spec.from_yaml(
-            af.examples.workloads.matmuls,
-            # af.examples.arches.networked.hierarchical,
-            INPUT_FILES_DIR / "hierarchical.yaml",
-            jinja_parse_data={
-                "N_EINSUMS": 1,
-                "M": 8,
-                "KN": 8,
-                "MAC_TILE": MAC_TILE,
-                "M_TILE": M_TILE,
-            },
-        )
-        result = spec.map_workload_to_arch()