FACTSlab · aaronstevenwhite · May 7, 2026 · May 6, 2026 · May 6, 2026 · May 7, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,143 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.0] - 2026-05-07
+
+### Added
+
+#### Pipeline-wide integration of the protocol layer
+
+- `bead.labels` is the single canonical home for the
+  `[[label]]` / `[[label:text]]` / `[[label|transform]]` syntax.
+  `parse_label_refs`, `find_label_names`, and `replace_label_refs`
+  replace the three independent regex implementations that previously
+  lived in `bead.protocol.drift`, `bead.deployment.jspsych.trials`,
+  and `bead.items.span_labeling`.
+- `bead.config.protocol.ProtocolConfig` plugs into `BeadConfig.protocol`
+  with declarative TOML/YAML configuration: anchor specs, drift
+  settings, realization strategies (template / contextual / lm), and
+  family composition. `ProtocolConfig.build(lm_client=..., cache=...)`
+  materializes a live `AnnotationProtocol`.
+- `bead.protocol.items` provides the canonical
+  `QuestionRealization → Item` and protocol-wide
+  `family_to_item_template` / `protocol_to_item_templates` /
+  `realize_protocol_to_items` bridges, plus `scale_type_to_task_type`
+  as the single canonical mapping from `ScaleType` to `TaskType`.
+- `bead.active_learning.models.registry` exposes
+  `MODEL_CLASSES` / `CONFIG_CLASSES` and
+  `model_class_for_task_type` / `config_class_for_task_type` /
+  `model_class_for_encoding` / `config_class_for_encoding` as the
+  single canonical task-type → model-class / config-class registry.
+  `bead.cli.models` and `bead.cli.training` consume the registry
+  directly, replacing two parallel string-keyed dicts and a dynamic
+  `_import_class` helper.
+- `bead.deployment.protocol_trials.protocol_to_jspsych_trials` is the
+  canonical end-to-end bridge from an `AnnotationProtocol` and a
+  sequence of `ProtocolContext` records to a flat list of jsPsych
+  trial dicts.
+- `bead.data_collection.jatos_results_to_annotation_records` converts
+  raw JATOS results into `AnnotationRecord` instances, the input
+  shape consumed by `annotator_reliability` and
+  `InterAnnotatorMetrics`.
+- `bead protocol` CLI subcommand: `bead protocol validate`,
+  `bead protocol realize`, `bead protocol items` drive the
+  configured protocol from the shell.
+
+### Changed
+
+- `LMRealization` accepts a `ModelOutputCache` (the bead-wide
+  content-addressable cache) via its required `cache` keyword and a
+  required `model_name` keyword for cache-key isolation. The internal
+  FIFO dict and the `cache` / `max_cache_size` / `clear_cache` /
+  `cache_size` parameters and methods are removed; the
+  `ModelOutputCache` is the single canonical caching surface.
+- `bead.cli.models` no longer maintains `TASK_TYPE_MODELS` /
+  `TASK_TYPE_CONFIGS` string-path dicts or the `_import_class`
+  helper; they are replaced by direct calls into
+  `bead.active_learning.models.registry`. `bead.cli.training` follows
+  the same pattern.
+- `bead.deployment.jspsych.trials._parse_prompt_references`,
+  `_SpanReference`, `_SPAN_REF_PATTERN`, and the duplicated
+  `_SPAN_REF_PATTERN` in `bead.items.span_labeling` are removed in
+  favor of `bead.labels.parse_label_refs` / `LabelRef`.
+
+#### `bead.protocol`: annotation protocol primitives
+
+A new top-level package providing a type-theoretic stack for defining
+annotation protocols: anchors as types, contexts as dependent
+indices, realization strategies as computational content, and drift
+guards as type-checkers.
+
+- `bead.protocol.anchor` defines `SemanticAnchor` (the type-level
+  spec of a question, with required span labels, required keywords,
+  optional embedding center and `max_drift`) and `ResponseSpace` /
+  `SemanticPoles`.
+- `bead.protocol.context` defines a generic `ProtocolContext` and
+  `ContextItem` plus a module-level **predicate registry**
+  (`register_context_predicate`, `get_context_predicate`,
+  `list_context_predicates`) for callers to register named context
+  predicates at import time.
+- `bead.protocol.realization` provides `RealizationStrategy`
+  (`typing.Protocol`), `TemplateRealization`,
+  `ContextualTemplateRealization` (rule-based selection from ranked
+  variants), and `LMRealization` (with caching and FIFO eviction)
+  plus an `LMClient` `Protocol` with explicit
+  `temperature` / `max_tokens` keyword parameters.
+- `bead.protocol.drift` defines `DriftScore`, the `DriftValidator`
+  `Protocol`, and three concrete validators
+  (`StructuralDriftValidator`, `EmbeddingDriftValidator`,
+  `PerplexityDriftValidator`) plus a composite `DriftGuard`. The
+  embedding and perplexity validators consume narrow
+  `EmbeddingAdapter` / `PerplexityAdapter` `Protocol`s, so any object
+  exposing the right method (including bead's
+  `bead.items.adapters.ModelAdapter`) conforms.
+- `bead.protocol.family` defines `QuestionFamily` (with explicit
+  `depends_on` for conditional dependencies) and `AnnotationProtocol`
+  (the iterated dependent product), with `realize_all` threading
+  responses through the context. `AnnotationProtocol` rejects
+  duplicate anchor names, self-dependencies, and forward / unknown
+  `depends_on` references at construction and on `append`.
+- `bead.protocol.encoding` defines `ScaleType`
+  (`StrEnum: binary / ordinal / nominal`) and `ResponseEncoding` (with
+  invariant validators for `n_levels == len(labels)`, label
+  uniqueness, and `BINARY` having exactly 2 levels), plus
+  `encode_response_space` as the bridge from `ResponseSpace`.
+- `bead.protocol.diagnostics` defines `DiagnosticLevel`,
+  `DiagnosticRecord`, `DatasetReport` (immutable, with `with_*`
+  mutators), `ConditionalObservationValidator` (which operates on
+  `AnnotationProtocol.depends_on`), and the `RecordLike` `Protocol`
+  for the structural record shape consumed by the validator.
+- `LMRealization` raises `RuntimeError` on backend failures and on
+  empty / whitespace-only responses (instead of caching an empty
+  string).
+
+#### `bead.evaluation.reliability`: per-annotator reliability
+
+- `AnnotationRecord` is a `BeadBaseModel` with the canonical
+  `(annotator_id, item_id, question_name, response_label)` shape.
+- `annotator_reliability(records, encodings=...)` returns
+  per-annotator response distributions and Shannon entropy in bits,
+  optionally filtering unrecognized labels.
+- `low_entropy_annotators(profiles, threshold=...)` flags annotators
+  who collapse the response space.
+
+### Documentation
+
+- `docs/api/protocol.md` and `docs/api/evaluation.md` updates expose
+  the new modules through `mkdocstrings`.
+- `docs/user-guide/protocols.md` walks through anchors, contexts
+  (including the predicate registry and per-dependent attributes),
+  the three realization strategies, drift validation (with the named
+  `EmbeddingAdapter` and `PerplexityAdapter` Protocols), protocol
+  composition, the structural construction-time invariants, the
+  `encode_response_space` bridge to modeling, conditional-observation
+  diagnostics (including the `RecordLike` Protocol), and reliability.
+- The protocol layer is cross-linked from
+  `docs/user-guide/concepts.md`, `docs/user-guide/index.md`,
+  `docs/index.md`, the project `README.md`, and a new "Protocol layer"
+  paragraph in `docs/developer-guide/architecture.md` that places it
+  as a cross-cutting layer feeding into the existing 6-stage pipeline.
+
 ## [0.3.0] - 2026-05-06
 
 ### Changed

diff --git a/README.md b/README.md
@@ -92,6 +92,7 @@ lists.save("lists/experiment.jsonl")
 - **Constraint satisfaction**: batch and list-level constraints for balanced designs
 - **Model integration**: HuggingFace, OpenAI, Anthropic with caching
 - **Active learning**: uncertainty sampling with convergence detection
+- **Annotation protocols**: type-theoretic stack of `SemanticAnchor` (the question type), `ProtocolContext` (the dependent index), `RealizationStrategy` (template / contextual / LM phrasings), and `DriftGuard` (the type-checker over realized prompts), composed into conditional `AnnotationProtocol`s
 - **jsPsych 8.x**: Material Design UI with JATOS deployment
 
 ## CLI

diff --git a/bead/__init__.py b/bead/__init__.py
@@ -6,6 +6,6 @@
 
 from __future__ import annotations
 
-__version__ = "0.3.0"
+__version__ = "0.4.0"
 __author__ = "Aaron Steven White"
 __email__ = "aaron.white@rochester.edu"
diff --git a/bead/active_learning/config.py b/bead/active_learning/config.py
@@ -7,9 +7,9 @@
 import didactic.api as dx
 
 __all__ = [
-    "VarianceComponents",
-    "RandomEffectsSpec",
     "MixedEffectsConfig",
+    "RandomEffectsSpec",
+    "VarianceComponents",
 ]
 
 

diff --git a/bead/active_learning/models/__init__.py b/bead/active_learning/models/__init__.py
@@ -9,16 +9,32 @@
 from bead.active_learning.models.magnitude import MagnitudeModel
 from bead.active_learning.models.multi_select import MultiSelectModel
 from bead.active_learning.models.ordinal_scale import OrdinalScaleModel
+from bead.active_learning.models.registry import (
+    CONFIG_CLASSES,
+    MODEL_CLASSES,
+    ModelConfig,
+    config_class_for_encoding,
+    config_class_for_task_type,
+    model_class_for_encoding,
+    model_class_for_task_type,
+)
 
 __all__ = [
+    "CONFIG_CLASSES",
+    "MODEL_CLASSES",
     "ActiveLearningModel",
     "BinaryModel",
     "CategoricalModel",
     "ClozeModel",
     "ForcedChoiceModel",
     "FreeTextModel",
     "MagnitudeModel",
+    "ModelConfig",
     "ModelPrediction",
     "MultiSelectModel",
     "OrdinalScaleModel",
+    "config_class_for_encoding",
+    "config_class_for_task_type",
+    "model_class_for_encoding",
+    "model_class_for_task_type",
 ]
diff --git a/bead/active_learning/models/registry.py b/bead/active_learning/models/registry.py
@@ -0,0 +1,182 @@
+"""Single canonical registry mapping task types to active-learning models.
+
+bead's eight task types each correspond to exactly one
+:class:`~bead.active_learning.models.base.ActiveLearningModel` subclass
+and one
+:class:`~bead.config.active_learning.BaseEncoderModelConfig`-derived
+config class. This module exposes those two mappings as the single
+source of truth used by:
+
+- :mod:`bead.cli.models` (CLI training commands)
+- :mod:`bead.protocol.items` (protocol-layer integration)
+- :func:`model_for_encoding` (protocol-encoding-driven model selection)
+
+There is no other place in the codebase that maps task types to model
+or config classes. Adding a new task type requires updating both
+mappings here and registering the new model module in
+:mod:`bead.active_learning.models`.
+"""
+
+from __future__ import annotations
+
+from typing import Final
+
+from bead.active_learning.models.base import ActiveLearningModel
+from bead.active_learning.models.binary import BinaryModel
+from bead.active_learning.models.categorical import CategoricalModel
+from bead.active_learning.models.cloze import ClozeModel
+from bead.active_learning.models.forced_choice import ForcedChoiceModel
+from bead.active_learning.models.free_text import FreeTextModel
+from bead.active_learning.models.magnitude import MagnitudeModel
+from bead.active_learning.models.multi_select import MultiSelectModel
+from bead.active_learning.models.ordinal_scale import OrdinalScaleModel
+from bead.config.active_learning import (
+    BinaryModelConfig,
+    CategoricalModelConfig,
+    ClozeModelConfig,
+    ForcedChoiceModelConfig,
+    FreeTextModelConfig,
+    MagnitudeModelConfig,
+    MultiSelectModelConfig,
+    OrdinalScaleModelConfig,
+)
+from bead.items.item_template import TaskType
+from bead.protocol.encoding import ResponseEncoding
+from bead.protocol.items import scale_type_to_task_type
+
+type ModelConfig = (
+    BinaryModelConfig
+    | CategoricalModelConfig
+    | ClozeModelConfig
+    | ForcedChoiceModelConfig
+    | FreeTextModelConfig
+    | MagnitudeModelConfig
+    | MultiSelectModelConfig
+    | OrdinalScaleModelConfig
+)
+"""Union of every active-learning model-config class."""
+
+
+MODEL_CLASSES: Final[dict[TaskType, type[ActiveLearningModel]]] = {
+    "binary": BinaryModel,
+    "categorical": CategoricalModel,
+    "cloze": ClozeModel,
+    "forced_choice": ForcedChoiceModel,
+    "free_text": FreeTextModel,
+    "magnitude": MagnitudeModel,
+    "multi_select": MultiSelectModel,
+    "ordinal_scale": OrdinalScaleModel,
+}
+"""The canonical task-type → model-class mapping.
+
+Add a new task type by appending an entry here and a matching entry
+in :data:`CONFIG_CLASSES`. Every keyed task type must be a
+``TaskType`` literal (the ``"span_labeling"`` task type has no
+active-learning model and is intentionally absent).
+"""
+
+
+CONFIG_CLASSES: Final[dict[TaskType, type[ModelConfig]]] = {
+    "binary": BinaryModelConfig,
+    "categorical": CategoricalModelConfig,
+    "cloze": ClozeModelConfig,
+    "forced_choice": ForcedChoiceModelConfig,
+    "free_text": FreeTextModelConfig,
+    "magnitude": MagnitudeModelConfig,
+    "multi_select": MultiSelectModelConfig,
+    "ordinal_scale": OrdinalScaleModelConfig,
+}
+"""The canonical task-type → config-class mapping."""
+
+
+def model_class_for_task_type(task_type: TaskType) -> type[ActiveLearningModel]:
+    """Return the model class registered for ``task_type``.
+
+    Parameters
+    ----------
+    task_type : TaskType
+        Task-type literal.
+
+    Returns
+    -------
+    type[ActiveLearningModel]
+        The registered subclass.
+
+    Raises
+    ------
+    KeyError
+        If ``task_type`` has no registered model (for example,
+        ``"span_labeling"``).
+    """
+    return MODEL_CLASSES[task_type]
+
+
+def config_class_for_task_type(task_type: TaskType) -> type[ModelConfig]:
+    """Return the config class registered for ``task_type``.
+
+    Parameters
+    ----------
+    task_type : TaskType
+        Task-type literal.
+
+    Returns
+    -------
+    type[ModelConfig]
+        The registered config class.
+
+    Raises
+    ------
+    KeyError
+        If ``task_type`` has no registered config.
+    """
+    return CONFIG_CLASSES[task_type]
+
+
+def model_class_for_encoding(
+    encoding: ResponseEncoding,
+) -> type[ActiveLearningModel]:
+    """Pick the active-learning model class for a protocol encoding.
+
+    Composes :func:`~bead.protocol.items.scale_type_to_task_type` with
+    :func:`model_class_for_task_type`. This is the canonical bridge
+    from a :class:`~bead.protocol.ResponseEncoding` to the model
+    class that should be trained on responses recorded under that
+    encoding.
+
+    Parameters
+    ----------
+    encoding : ResponseEncoding
+        Protocol-side response encoding.
+
+    Returns
+    -------
+    type[ActiveLearningModel]
+        The matching model class.
+
+    Examples
+    --------
+    >>> from bead.protocol import ResponseSpace, encode_response_space
+    >>> rs = ResponseSpace(options=("no", "yes"), is_ordered=False)
+    >>> enc = encode_response_space("dynamicity", rs)
+    >>> model_class_for_encoding(enc).__name__
+    'BinaryModel'
+    """
+    return model_class_for_task_type(scale_type_to_task_type(encoding.scale_type))
+
+
+def config_class_for_encoding(
+    encoding: ResponseEncoding,
+) -> type[ModelConfig]:
+    """Pick the active-learning config class for a protocol encoding.
+
+    Parameters
+    ----------
+    encoding : ResponseEncoding
+        Protocol-side response encoding.
+
+    Returns
+    -------
+    type[ModelConfig]
+        The matching config class.
+    """
+    return config_class_for_task_type(scale_type_to_task_type(encoding.scale_type))
diff --git a/bead/cli/main.py b/bead/cli/main.py
@@ -418,6 +418,7 @@ def _lazy_load(self, cmd_name: str) -> click.Command:
         "items": ("bead.cli.items", "items"),
         "lists": ("bead.cli.lists", "lists"),
         "models": ("bead.cli.models", "models"),
+        "protocol": ("bead.cli.protocol", "protocol"),
         "resources": ("bead.cli.resources", "resources"),
         "shell": ("bead.cli.shell", "shell"),
         "simulate": ("bead.cli.simulate", "simulate"),