diff --git a/clams/app/__init__.py b/clams/app/__init__.py
index 1d3f0a2..88f612b 100644
--- a/clams/app/__init__.py
+++ b/clams/app/__init__.py
@@ -9,11 +9,11 @@
 from datetime import datetime
 from urllib import parse as urlparser
 
-__all__ = ['ClamsApp']
+__all__ = ['ClamsApp', 'ClamsPromptableApp', 'ClamsHFPromptableApp']
 
-from typing import Union, Any, Optional, Dict, List, Tuple
+from typing import Union, Any, Optional, Dict, List, Tuple, cast
 
-from mmif import Mmif, Document, DocumentTypes, View
+from mmif import Mmif, Document, DocumentTypes, View, AnnotationTypes
 from mmif.utils.video_document_helper import (
     SamplingMode, SAMPLING_MODE_DESCRIPTIONS, SAMPLING_MODE_DEFAULT,
     _sampling_mode,
@@ -75,7 +75,7 @@ class ClamsApp(ABC):
         # how vdh.extract_frames_by_mode() selects frames from TimeFrames.
         # The value is intercepted in annotate() and pushed into a
         # contextvars.ContextVar so that any vdh call inside _annotate()
-        # picks it up automatically — app developers never need to handle
+        # picks it up automatically; app developers never need to handle
         # this parameter themselves.
         {
             'name': 'tfSamplingMode', 'type': 'string',
@@ -116,7 +116,7 @@ def appmetadata(self, **kwargs: List[str]) -> str:
         """
         # cast only, no refinement
         casted = self.metadata_param_caster.cast(kwargs)
-        pretty = casted.pop('pretty') if 'pretty' in casted else False
+        pretty = casted.get('pretty', False)
         return self.metadata.jsonify(pretty)
     
     def _load_appmetadata(self) -> AppMetadata:
@@ -131,7 +131,7 @@ def _load_appmetadata(self) -> AppMetadata:
         In any case, :class:`~clams.appmetadata.AppMetadata` class must be useful.
         
         For metadata specification, 
-        see `https://sdk.clams.ai/appmetadata.jsonschema <../appmetadata.jsonschema>`_. 
+        see `https://clams.ai/clams-python/appmetadata.jsonschema <../appmetadata.jsonschema>`_. 
         """
         cwd = pathlib.Path(sys.modules[self.__module__].__file__).parent
         
@@ -185,7 +185,7 @@ def annotate(self, mmif: Union[str, dict, Mmif], **runtime_params: List[str]) ->
         refined = self._refine_params(**runtime_params)
         self.logger.debug(f"Refined parameters: {refined}")
         pretty = refined.get('pretty', False)
-        sampling_mode_str = refined.pop('tfSamplingMode', None)
+        sampling_mode_str = refined.get('tfSamplingMode', None)
         if sampling_mode_str is not None:
             _sampling_mode.set(SamplingMode(sampling_mode_str))
         t = datetime.now()
@@ -639,6 +639,771 @@ def open_document_location(document: Union[str, Document], opener: Any = open, *
                     raise FileNotFoundError(p.path)
 
 
+# TODO (krim @ 05/28/26): maybe we should consider implementing
+# autodoc-based auto documentation export (e.g., ``automethod`` for
+# methods and a small Sphinx extension to render
+# ``promptable_parameters`` into the parameter table), instead of the
+# current hand-authored ``documentation/app-baseclasses.rst``.
+class ClamsPromptableApp(ClamsApp):
+    """
+    Base class for CLAMS apps that wrap a promptable model (an LLM or
+    other multimodal model, local or remote). Standardizes the runtime
+    parameter surface (prompt, generation hyperparameters, parallelism
+    control) and provides helpers for building chat conversations and
+    persisting model responses into MMIF.
+
+    The standardized parameters are listed in
+    :py:attr:`promptable_parameters` and added to an app's metadata via
+    :py:meth:`inject_promptable_parameters`. Promptable-app developers
+    MUST call that helper at the end of their ``appmetadata()`` function
+    in ``metadata.py``. The reservation rule (these parameter names are
+    SDK-managed and apps cannot redeclare them) is enforced implicitly
+    via :py:meth:`AppMetadata.add_parameter`'s existing duplicate-name
+    check.
+
+    Inference is performed by :py:meth:`generate`, which subclasses MUST
+    implement. The base class provides:
+
+    * :py:meth:`inject_promptable_parameters` : adds the SDK-managed
+      parameter set to ``AppMetadata``
+    * :py:meth:`build_conversation` : assembles a chat-template-compatible
+      message list from a prompt plus optional images/audios
+    * :py:meth:`response_to_grounded_textdocument` : persists a
+      generated response into a view as ``TextDocument`` +
+      ``Alignment`` (+ optional ``origins`` / ``origination``)
+    """
+
+    #: SDK-managed runtime parameters injected into every promptable app.
+    #: These names are reserved; apps cannot redeclare them with
+    #: customized specs.
+    promptable_parameters = [
+        {
+            'name': 'prompt', 'type': 'string', 'multivalued': True,
+            'description':
+                'User prompt(s) sent to the model. A single value runs as a '
+                'one-shot generation. A multi-value list is interpreted as a '
+                'multi-turn static prompt; see ``promptMode`` for how turns '
+                'are assembled.',
+        },
+        {
+            'name': 'systemPrompt', 'type': 'string', 'default': '',
+            'description':
+                'Optional system-role text prepended to the conversation. '
+                'Empty by default.',
+        },
+        {
+            'name': 'promptMode', 'type': 'string',
+            'choices': ['user-only', 'turn-taking'],
+            'default': 'turn-taking',
+            'description':
+                'How to interpret a multi-value ``prompt`` list. '
+                'Has no effect when ``prompt`` has a single value. '
+                'For semantics of each choice and worked examples, see '
+                'https://clams.ai/clams-python/app-baseclasses.html#promptable-multiturn',
+        },
+        {
+            'name': 'maxNewTokens', 'type': 'integer', 'default': 512,
+            'description':
+                'Maximum number of new tokens generated per inference call. '
+                'Forwarded to the backend\'s ``generate``-equivalent. Larger '
+                'values grow the KV cache linearly and increase GPU memory '
+                'usage; reduce if VRAM is constrained.',
+        },
+        {
+            'name': 'temperature', 'type': 'number', 'default': 0.0,
+            'description':
+                'Sampling temperature. The default ``0.0`` selects '
+                'deterministic / greedy decoding for maximum reproducibility; '
+                'override for sampled generation.',
+        },
+        {
+            'name': 'topP', 'type': 'number', 'default': 1.0,
+            'description':
+                'Nucleus-sampling cumulative probability cutoff. Only '
+                'meaningful when ``temperature`` is greater than 0.',
+        },
+        {
+            'name': 'topK', 'type': 'integer', 'default': 50,
+            'description':
+                'Top-K sampling cutoff. Only meaningful when ``temperature`` '
+                'is greater than 0.',
+        },
+        {
+            'name': 'parallelPrompts', 'type': 'integer', 'default': 1,
+            'description':
+                'Number of independent prompts the app runs in parallel '
+                '(stacks into a single forward pass). The *size* of each '
+                'prompt (how many images, how long the system/user text '
+                'is, etc.) is NOT regulated by this parameter; that is '
+                'each app\'s responsibility. Prompt count and per-prompt '
+                'content size combine multiplicatively for GPU memory, '
+                'so the two can blow up together. Catastrophic example: '
+                '``tfSamplingMode=all`` on a TimeFrame without '
+                '``targets`` expands that TF into one image per '
+                'native-FPS frame (300 images for a 10-second TF at '
+                '30fps); ``parallelPrompts=4`` then runs 4 such prompts '
+                'in one forward pass (~1200 images), guaranteed OOM. '
+                'Keep at ``1`` on memory-tight setups; raise only when '
+                'per-prompt content is small and bounded.',
+        },
+    ]
+
+    @staticmethod
+    def inject_promptable_parameters(metadata: AppMetadata) -> None:
+        """
+        Add the SDK-managed promptable parameters to ``metadata``. Call
+        this at the end of your app's ``appmetadata()`` function in
+        ``metadata.py`` if your app subclasses
+        :py:class:`ClamsPromptableApp`.
+
+        The reservation rule is enforced implicitly: if the app had
+        already called ``metadata.add_parameter('prompt', ...)`` (or
+        any other promptable name) before this helper, the helper's own
+        ``add_parameter`` call will trip the existing duplicate-name
+        ``ValueError`` in :py:meth:`AppMetadata.add_parameter`.
+
+        :param metadata: the :class:`AppMetadata` instance being built
+        """
+        for param in ClamsPromptableApp.promptable_parameters:
+            metadata.add_parameter(**param)
+
+    def __init__(self):
+        # ``ClamsApp.__init__`` loads the app's ``metadata.py``, which
+        # is expected to have already called
+        # ``inject_promptable_parameters()`` from inside
+        # ``appmetadata()``. The parent ``__init__`` then iterates
+        # ``self.metadata.parameters`` to populate
+        # ``annotate_param_spec`` and build the caster, so the
+        # promptable parameters are already covered by the time we land
+        # here. We only validate that the helper was actually called.
+        super().__init__()
+        declared = {p.name for p in self.metadata.parameters}
+        expected = {p['name'] for p in ClamsPromptableApp.promptable_parameters}
+        missing = expected - declared
+        if missing:
+            raise ValueError(
+                f"Promptable parameters {sorted(missing)} are missing "
+                f"from the app metadata. Promptable apps must call "
+                f"``ClamsPromptableApp.inject_promptable_parameters("
+                f"metadata)`` inside their ``appmetadata()`` function "
+                f"in ``metadata.py``."
+            )
+
+    @abstractmethod
+    def generate(
+            self,
+            prompt: List[str],
+            system_prompt: str = '',
+            images: Optional[List[List[Any]]] = None,
+            audios: Optional[List[List[Any]]] = None,
+            prompt_mode: str = 'turn-taking',
+            **generation_params,
+    ) -> List[str]:
+        """
+        Run N independent prompts in one inference call and return N
+        outputs. Subclasses MUST implement this.
+
+        Each inner list of ``images`` / ``audios`` is the bundled
+        multimodal content for ONE prompt -- the model sees those
+        items as one composite input and produces one output. The
+        outer list spans N prompts processed in parallel (when the
+        backend supports it; sequentially otherwise).
+
+        * Single-prompt call: ``images=[[img1, img2]]`` -> one output
+          (composite over the two bundled images).
+        * Per-input broadcast: ``images=[[img1], [img2], [img3]]`` ->
+          three outputs (one per image). Caller assembles the
+          singleton-wrap shape.
+        * Multimodal pair: ``images=[[img1]], audios=[[au1]]`` -> one
+          output. When both ``images`` and ``audios`` are given they
+          must have the same outer length; index ``i`` of each pairs
+          into prompt ``i``.
+
+        :param prompt: a ``List[str]`` of prompt turns. A
+            single-element list is one-shot. A multi-element list is
+            multi-turn and is assembled according to ``prompt_mode``.
+        :param system_prompt: optional system-role text prepended to
+            the conversation. Applies to every prompt in the batch.
+        :param images: optional ``List[List[Any]]`` -- N groups, one
+            per prompt; each inner list is the bundled images for that
+            prompt.
+        :param audios: optional ``List[List[Any]]`` -- N groups, one
+            per prompt; each inner list is the bundled audio clips
+            for that prompt.
+        :param prompt_mode: ``"turn-taking"`` (default) or
+            ``"user-only"``; see :py:attr:`promptable_parameters`.
+        :param generation_params: any additional backend-specific
+            generation kwargs (``maxNewTokens``, ``temperature``,
+            ``topP``, ``topK``, etc.).
+        :return: a ``List[str]`` with one entry per prompt in the
+            batch. For ``prompt_mode='user-only'`` multi-turn, each
+            prompt's entry is the assistant's final reply across its
+            N user turns.
+        :rtype: List[str]
+        """
+        raise NotImplementedError
+
+    def build_conversation(
+            self,
+            prompt: Union[str, List[str], List[dict]],
+            system_prompt: str = '',
+            images: Optional[List[Any]] = None,
+            audios: Optional[List[Any]] = None,
+            prompt_mode: str = 'turn-taking',
+    ) -> Union[List[dict], List[List[dict]]]:
+        """
+        Build a chat-template-compatible message list.
+
+        :param prompt: a plain string, a ``List[str]`` of prompt turns,
+            or a pre-built ``List[dict]`` of role/content message
+            objects (returned as-is; pass-through for advanced
+            callers that constructed the conversation themselves).
+        :param system_prompt: if non-empty, prepended as a
+            system-role message.
+        :param images: optional list of image inputs to include in the
+            (final) user turn's content. Each appears as a
+            ``{'type': 'image', 'image': <input>}`` entry.
+        :param audios: optional list of audio inputs to include in the
+            (final) user turn's content. Each appears as a
+            ``{'type': 'audio', 'audio': <input>}`` entry.
+        :param prompt_mode: ``"turn-taking"`` (default) or
+            ``"user-only"``. Only meaningful when ``prompt`` is a
+            multi-element list; ignored otherwise. See
+            :py:attr:`promptable_parameters` for semantics.
+
+        :returns:
+            * For single-shot prompts (string or single-element list)
+              and for multi-element ``turn-taking`` mode: a single
+              ``List[dict]`` of role/content messages, ready to feed
+              to a chat-template applier (e.g.,
+              ``processor.apply_chat_template``).
+            * For multi-element ``user-only`` mode: a
+              ``List[List[dict]]`` of N progressively-extending
+              conversation prefixes, one per user turn. Each prefix
+              ends in a user turn; assistant turns between users are
+              stored with ``content=None`` as placeholders for the
+              caller to fill in with successive generation results.
+
+        Subclasses may override to access model-specific state
+        (``self.processor``, ``self.tokenizer``, etc.) during
+        formatting; the base implementation is back-end-agnostic.
+        """
+        # Pass-through for pre-built message lists.
+        if isinstance(prompt, list) and prompt and all(
+                isinstance(p, dict) for p in prompt):
+            return cast(List[dict], prompt)
+
+        # Normalize to List[str].
+        if isinstance(prompt, str):
+            prompts = [prompt]
+        else:
+            prompts = list(prompt)
+
+        if len(prompts) == 1:
+            return self._build_single_turn(
+                prompts[0], system_prompt, images, audios)
+
+        if prompt_mode == 'turn-taking':
+            return self._build_turn_taking(
+                prompts, system_prompt, images, audios)
+        if prompt_mode == 'user-only':
+            return self._build_user_only(
+                prompts, system_prompt, images, audios)
+        raise ValueError(
+            f"Unknown prompt_mode: {prompt_mode!r}. "
+            f"Expected 'turn-taking' or 'user-only'.")
+
+    @staticmethod
+    def _make_user_content(text, images=None, audios=None):
+        """Build the content list for a user-role message."""
+        content = []
+        if images:
+            for img in images:
+                content.append({'type': 'image', 'image': img})
+        if audios:
+            for au in audios:
+                content.append({'type': 'audio', 'audio': au})
+        content.append({'type': 'text', 'text': text})
+        return content
+
+    def _build_single_turn(self, text, system_prompt, images, audios):
+        messages = []
+        if system_prompt:
+            messages.append({'role': 'system', 'content': system_prompt})
+        messages.append({
+            'role': 'user',
+            'content': self._make_user_content(text, images, audios),
+        })
+        return messages
+
+    def _build_turn_taking(self, prompts, system_prompt, images, audios):
+        """
+        Alternating user/assistant turns; one inference call.
+        Even indices in ``prompts`` are user turns, odd indices are
+        pre-written assistant exemplars. Images/audios (if any) are
+        attached to the final user turn (the actual query).
+        """
+        messages = []
+        if system_prompt:
+            messages.append({'role': 'system', 'content': system_prompt})
+        # index of the final user turn (the last even index)
+        last_user_idx = (len(prompts) - 1) - ((len(prompts) - 1) % 2)
+        for i, text in enumerate(prompts):
+            role = 'user' if i % 2 == 0 else 'assistant'
+            if role == 'user':
+                attach_media = (i == last_user_idx)
+                content = self._make_user_content(
+                    text,
+                    images if attach_media else None,
+                    audios if attach_media else None,
+                )
+                messages.append({'role': 'user', 'content': content})
+            else:
+                messages.append({'role': 'assistant', 'content': text})
+        return messages
+
+    def _build_user_only(self, prompts, system_prompt, images, audios):
+        """
+        N progressively-extending conversation prefixes, one per user
+        turn. Assistant slots between users have ``content=None`` as
+        placeholders for the caller's successive generation results.
+        """
+        convs: List[List[dict]] = []
+        base: List[dict] = []
+        if system_prompt:
+            base.append({'role': 'system', 'content': system_prompt})
+        for i, text in enumerate(prompts):
+            # First user turn carries the images/audios (the initial query);
+            # subsequent user turns are text-only.
+            user_content = self._make_user_content(
+                text,
+                images if i == 0 else None,
+                audios if i == 0 else None,
+            )
+            base.append({'role': 'user', 'content': user_content})
+            # Snapshot the conversation as it stands at the start of
+            # the i-th generation call. Shallow-copy each message so
+            # later in-place edits (e.g., filling in the assistant
+            # placeholder) don't retroactively mutate earlier
+            # snapshots.
+            convs.append([dict(m) for m in base])
+            if i < len(prompts) - 1:
+                base.append({'role': 'assistant', 'content': None})
+        return convs
+
+    def response_to_grounded_textdocument(
+            self,
+            view: View,
+            source: str,
+            response: str,
+            origins: Optional[List[str]] = None,
+            origination: Optional[str] = None,
+            reasoning_trace: Optional[str] = None,
+    ) -> Tuple[Any, Any]:
+        """
+        Persist a single LLM text response into a view. Writes one
+        ``TextDocument`` (containing the response) plus possible
+        grounding via an ``Alignment`` annotation and ``origins`` / 
+        ``origination`` properties on the TD.
+
+        The two grounding link kinds are semantically distinct:
+
+        * ``source`` is the *coarse* cross-modal grounding -- the
+          single annotation id that the response is anchored to.
+          Written into the new ``Alignment`` (``source -> td``).
+          Typical value: the parent ``TimeFrame`` for a
+          captioning/OCR app.
+        * ``origins`` are the *finer* derivation grounding -- a list
+          of annotation ids the response was specifically derived
+          from (e.g. the ``TimePoint``\\s whose frames were fed to
+          the model). Written into ``TextDocument.origins``. See
+          https://clams.ai/clams-vocabulary/Document for vocabulary
+          semantics.
+
+        :param view: the :class:`View` to write into. The caller is
+            responsible for having called
+            :meth:`View.new_contain` for ``TextDocument`` and
+            ``Alignment`` first if needed.
+        :param source: ``id`` of the annotation to record as the
+            cross-modal anchor of the response (see above).
+        :param response: the text generated by the model.
+        :param origins: optional list of ``id``\\s of annotations the
+            response was *derived* from. Must be paired with
+            ``origination``.
+        :param origination: nature of the derivation, written to
+            ``TextDocument.origination``. Accepted values per the
+            vocabulary include ``'derived'``, ``'transcription'``,
+            ``'topologically-identical'``. Must be paired with
+            ``origins``.
+        :param reasoning_trace: optional model-side reasoning trace
+            (a chain-of-thought / scratchpad string, NOT a Python
+            traceback). NOT YET SUPPORTED -- passing a non-``None``
+            value raises :py:class:`NotImplementedError`. Storage
+            convention is still being decided at
+            clamsproject/clams-python#263.
+        :return: ``(TextDocument, Alignment)`` tuple of the new
+            annotations.
+        :raises ValueError: if exactly one of ``origins`` /
+            ``origination`` is set; they must be supplied together
+            or both omitted.
+        """
+        if bool(origins) != bool(origination):
+            raise ValueError(
+                "`origins` and `origination` must be supplied together "
+                "or both omitted; got "
+                f"origins={origins!r}, origination={origination!r}."
+            )
+        td = view.new_textdocument(text=response)
+        if origins:
+            td.add_property('origins', origins)
+            td.add_property('origination', origination)
+        align = view.new_annotation(
+            AnnotationTypes.Alignment,
+            source=source,
+            target=td.id,
+        )
+        if reasoning_trace is not None:
+            raise NotImplementedError(
+                "Reasoning-trace storage convention is not yet defined; "
+                "tracked at clamsproject/clams-python#263."
+            )
+        return td, align
+
+
+class ClamsHFPromptableApp(ClamsPromptableApp):
+    """
+    Base class for promptable CLAMS apps backed by a local
+    HuggingFace ``transformers`` model. Layers HF-specific inference
+    plumbing on top of :class:`ClamsPromptableApp`: model loading
+    via :func:`clams.backends.hf.load_hf_model`, and a concrete
+    :py:meth:`generate` implementation that runs N independent
+    prompts in one HF forward pass via the standard
+    chat-template -> ``model.generate`` -> ``batch_decode`` pipeline.
+
+    Concrete subclasses declare the model class via :py:attr:`MODEL_CLS`
+    plus a handful of optional dtype/padding hints, and the family of
+    pinned model revisions via ``analyzer_versions`` in
+    ``metadata.py``. The SDK auto-derives a ``model`` runtime
+    parameter (choices = keys of ``analyzer_versions``), and the dev's
+    ``_annotate`` calls :py:meth:`load_model` to (lazily) load the
+    requested family member. Singleton families (one entry in
+    ``analyzer_versions``) eagerly pre-load in ``__init__`` so
+    single-model apps preserve warm-start semantics. Example::
+
+        class MyVLMCaptioner(ClamsHFPromptableApp):
+            MODEL_CLS = AutoModelForImageTextToText
+            DTYPE = torch.bfloat16
+            PADDING_SIDE = 'left'
+
+            # In metadata.py:
+            #     analyzer_versions={
+            #         "HuggingFaceTB/SmolVLM2-2.2B-Instruct": "482adb5",
+            #     }
+            # plus a call to
+            # ClamsHFPromptableApp.inject_promptable_parameters(metadata).
+
+            def _annotate(self, mmif, **parameters):
+                self.load_model(parameters['model'])
+                # ... self.generate(prompt, images=image_groups, ...)
+                # ... self.response_to_grounded_textdocument(...)
+                ...
+
+    Requires the ``[hf]`` extra (``pip install clams-python[hf]``).
+    """
+
+    #: ``transformers`` model class (e.g.
+    #: :class:`~transformers.AutoModelForImageTextToText`,
+    #: :class:`~transformers.AutoModelForCausalLM`). Subclasses MUST
+    #: set this.
+    MODEL_CLS: Optional[Any] = None
+    #: ``transformers`` processor / tokenizer / feature-extractor
+    #: class. Defaults to :class:`~transformers.AutoProcessor` (set
+    #: by :func:`clams.backends.hf.load_hf_model` when ``None``).
+    PROCESSOR_CLS: Optional[Any] = None
+    #: Torch dtype for the model (e.g. ``torch.bfloat16``). When
+    #: ``None``, the model class's own default is used (typically
+    #: float32). Also used to cast ``pixel_values`` in
+    #: :py:meth:`generate`.
+    DTYPE: Optional[Any] = None
+    #: Tokenizer padding side. Set to ``'left'`` for decoder-only
+    #: batched generation; leave ``None`` otherwise.
+    PADDING_SIDE: Optional[str] = None
+    #: Extra kwargs forwarded to ``MODEL_CLS.from_pretrained()``.
+    MODEL_KWARGS: Optional[dict] = None
+    #: Extra kwargs forwarded to ``PROCESSOR_CLS.from_pretrained()``.
+    PROCESSOR_KWARGS: Optional[dict] = None
+
+    @staticmethod
+    def inject_promptable_parameters(metadata: AppMetadata) -> None:
+        """
+        Add the SDK-managed promptable parameters AND a ``model``
+        parameter derived from ``metadata.analyzer_versions`` to the
+        app metadata. Overrides
+        :py:meth:`ClamsPromptableApp.inject_promptable_parameters` for
+        HF apps; call this at the end of your app's ``appmetadata()``
+        function in ``metadata.py`` if your app subclasses
+        :py:class:`ClamsHFPromptableApp`.
+
+        :param metadata: the :class:`AppMetadata` instance being
+            built. ``metadata.analyzer_versions`` MUST already be set
+            to a non-empty ``Dict[str, str]`` (model id -> commit
+            hash); this helper reads it to derive the ``model``
+            parameter's choices.
+        :raises ValueError: if ``metadata.analyzer_versions`` is
+            missing or empty.
+        """
+        ClamsPromptableApp.inject_promptable_parameters(metadata)
+        analyzer_versions = metadata.analyzer_versions or {}
+        if not analyzer_versions:
+            raise ValueError(
+                "ClamsHFPromptableApp.inject_promptable_parameters "
+                "requires ``metadata.analyzer_versions`` to be a "
+                "non-empty dict (HF model id -> commit hash). Set "
+                "it on the ``AppMetadata`` constructor call before "
+                "invoking this helper.")
+        choices = list(analyzer_versions.keys())
+        default = choices[0] if len(choices) == 1 else None
+        metadata.add_parameter(
+            name='model',
+            type='string',
+            choices=choices,
+            default=default,
+            multivalued=False,
+            description=(
+                "HuggingFace model identifier to use for this "
+                "request. Must be one of the model ids declared in "
+                "this app's ``analyzer_versions``; the SDK pins the "
+                "corresponding commit hash at load time. When the "
+                "app ships a single model (the typical case), this "
+                "parameter defaults to that one model and can be "
+                "omitted. Pass the full HF model id (e.g. "
+                "``org/repo-name``); URL-encoding the ``/`` is "
+                "optional."
+            ),
+        )
+
+    def __init__(self):
+        super().__init__()
+        cls_name = type(self).__name__
+        if self.MODEL_CLS is None:
+            raise ValueError(
+                f"{cls_name} must set the ``MODEL_CLS`` class attribute "
+                f"(a ``transformers`` model class).")
+        analyzer_versions = self.metadata.analyzer_versions
+        if not analyzer_versions:
+            raise ValueError(
+                f"{cls_name} must declare ``analyzer_versions`` in "
+                f"``metadata.py`` as a non-empty Dict[str, str] "
+                f"mapping HuggingFace model ids to pinned commit "
+                f"hashes (7-char abbreviation is sufficient). This is "
+                f"required for reproducibility: an unpinned download "
+                f"silently floats on whatever ``main`` points at and "
+                f"cannot be reproduced. Singleton families (one "
+                f"entry) are fine; multi-model families list every "
+                f"member.")
+        if 'model' not in {p.name for p in self.metadata.parameters}:
+            raise ValueError(
+                f"{cls_name} must call "
+                f"``ClamsHFPromptableApp.inject_promptable_parameters"
+                f"(metadata)`` (the HF override that also adds the "
+                f"``model`` parameter) inside ``appmetadata()`` in "
+                f"``metadata.py``; calling "
+                f"``ClamsPromptableApp.inject_promptable_parameters`` "
+                f"directly skips the ``model`` parameter and trips "
+                f"this check.")
+        #: Per-(model_id, revision) cache of loaded
+        #: ``(processor, model, device)`` triples. Populated by
+        #: :py:meth:`load_model`; survives for the lifetime of this
+        #: app instance.
+        self._model_cache: Dict[Tuple[str, str], Tuple[Any, Any, str]] = {}
+        #: References to the currently-active loaded model. Set by
+        #: :py:meth:`load_model`; ``generate()`` and friends read
+        #: from here. ``None`` until the first ``load_model`` call
+        #: (or until ``__init__`` eager-loads a singleton family).
+        self.processor: Any = None
+        self.model: Any = None
+        self.device: Optional[str] = None
+        # Singleton families pre-load in ``__init__`` so single-model
+        # apps preserve warm-start UX (no first-request latency cost).
+        # Multi-member families defer to lazy loading on the first
+        # ``load_model`` call.
+        if len(analyzer_versions) == 1:
+            only_model_id = next(iter(analyzer_versions.keys()))
+            self.load_model(only_model_id)
+
+    def _refine_params(self, **runtime_params):
+        """
+        Expand ``model`` from the raw HF id (``org/name``) to
+        ``org/name@<revision>`` so the resolved revision lands in
+        ``view.metadata.appConfiguration['model']``.
+        """
+        refined = super()._refine_params(**runtime_params)
+        model_id = refined.get('model')
+        if isinstance(model_id, str) and '@' not in model_id:
+            revision = (self.metadata.analyzer_versions or {}).get(model_id)
+            if revision is not None:
+                refined['model'] = f"{model_id}@{revision}"
+        return refined
+
+    def load_model(
+            self, model_id_or_with_rev: str,
+    ) -> Tuple[Any, Any, str]:
+        """
+        Load (or return cached) ``(processor, model, device)`` for
+        the given model id. Accepts both refined (``org/name@rev``)
+        and raw (``org/name``) forms; for raw form, the revision is
+        looked up from ``self.metadata.analyzer_versions``. Caches
+        results per ``(model_id, revision)`` and updates
+        :py:attr:`self.processor`, :py:attr:`self.model`,
+        :py:attr:`self.device` to the loaded triple so subsequent
+        :py:meth:`generate` calls operate on it.
+
+        :param model_id_or_with_rev: HF model id, optionally with
+            ``@<revision>`` suffix.
+        :return: ``(processor, model, device)`` tuple for the loaded
+            model. Same references are also stored on ``self``.
+        :raises KeyError: if a raw model id is passed and is not in
+            ``analyzer_versions``.
+        """
+        if '@' in model_id_or_with_rev:
+            model_id, _, revision = model_id_or_with_rev.rpartition('@')
+        else:
+            model_id = model_id_or_with_rev
+            revision = self.metadata.analyzer_versions[model_id]
+        cache_key = (model_id, revision)
+        cached = self._model_cache.get(cache_key)
+        if cached is not None:
+            self.processor, self.model, self.device = cached
+            return cached
+        # Lazy import: avoids pulling torch/transformers into the base
+        # clams-python install. Apps using this class must have the
+        # ``[hf]`` extra installed.
+        from clams.backends.hf import load_hf_model
+        self.logger.info(f"Loading HF model from {model_id} @ {revision}")
+        triple = load_hf_model(
+            model_id,
+            self.MODEL_CLS,
+            processor_cls=self.PROCESSOR_CLS,
+            dtype=self.DTYPE,
+            padding_side=self.PADDING_SIDE,
+            revision=revision,
+            model_kwargs=self.MODEL_KWARGS,
+            processor_kwargs=self.PROCESSOR_KWARGS,
+        )
+        self.logger.info(f"HF model loaded on {triple[2]}")
+        self._model_cache[cache_key] = triple
+        self.processor, self.model, self.device = triple
+        return triple
+
+    def generate(
+            self,
+            prompt: List[str],
+            system_prompt: str = '',
+            images: Optional[List[List[Any]]] = None,
+            audios: Optional[List[List[Any]]] = None,
+            prompt_mode: str = 'turn-taking',
+            **generation_params,
+    ) -> List[str]:
+        """
+        Default implementation of the
+        :py:meth:`ClamsPromptableApp.generate` contract for
+        HuggingFace ``transformers`` models. Runs N prompts in one
+        forward pass; returns N decoded strings.
+
+        Each inner list of ``images`` / ``audios`` is the bundled
+        content for one prompt. When both ``images`` and ``audios``
+        are given they must have the same outer length (multimodal
+        pairs are stitched by index). When both are ``None``, runs as
+        a single text-only prompt.
+
+        The default body is the canonical HF chat-model pipeline:
+        :py:meth:`build_conversation` -> ``apply_chat_template`` ->
+        ``model.generate`` -> ``batch_decode``. Subclasses can
+        customize finer-grained pieces via
+        :py:meth:`build_conversation` (model-specific message shape)
+        and :py:meth:`build_gen_kwargs` (model-specific generation
+        kwargs) without touching this method.
+        """
+        if images is not None and audios is not None:
+            if len(images) != len(audios):
+                raise ValueError(
+                    f"images and audios must have the same outer length "
+                    f"when both are given; got "
+                    f"{len(images)} vs {len(audios)}.")
+        if images is not None:
+            n = len(images)
+        elif audios is not None:
+            n = len(audios)
+        else:
+            n = 1  # text-only single prompt
+        if n == 0:
+            return []
+        gen_kwargs = self.build_gen_kwargs(**generation_params)
+        try:
+            conversations = [
+                self.build_conversation(
+                    prompt, system_prompt=system_prompt,
+                    images=images[i] if images is not None else None,
+                    audios=audios[i] if audios is not None else None,
+                    prompt_mode=prompt_mode)
+                for i in range(n)
+            ]
+            inputs = self.processor.apply_chat_template(
+                conversations,
+                add_generation_prompt=True,
+                tokenize=True,
+                return_dict=True,
+                padding=True,
+                return_tensors="pt",
+            )
+            inputs = inputs.to(self.device)
+            if (self.DTYPE is not None
+                    and 'pixel_values' in inputs
+                    and inputs['pixel_values'] is not None):
+                inputs['pixel_values'] = inputs['pixel_values'].to(
+                    dtype=self.DTYPE)
+            generated_ids = self.model.generate(**inputs, **gen_kwargs)
+            input_len = inputs.input_ids.shape[1]
+            new_tokens = generated_ids[:, input_len:]
+            return self.processor.batch_decode(
+                new_tokens, skip_special_tokens=True)
+        except Exception as e:
+            self.logger.error(
+                f"Error processing batch: {e}", exc_info=True)
+            return [''] * n
+
+    @staticmethod
+    def build_gen_kwargs(
+            max_new_tokens: int = 512,
+            temperature: float = 0.0,
+            top_p: float = 1.0,
+            top_k: int = 50,
+            **_unused,
+    ) -> dict:
+        """
+        Translate the SDK's promptable-parameter values into
+        HuggingFace ``model.generate()`` kwargs. Greedy decoding
+        (``do_sample=False``) when ``temperature == 0.0``; sampled
+        decoding with the given ``top_p`` / ``top_k`` otherwise.
+
+        Subclasses MAY override to add model-specific generation
+        kwargs (``num_beams``, ``repetition_penalty``, custom
+        stopping criteria, ``do_sample`` overrides, etc.). The base
+        implementation accepts any extra keyword args and silently
+        ignores them, so subclasses can pass through the full
+        ``**parameters`` dict from ``_annotate`` without filtering.
+        """
+        gen_kwargs = {'max_new_tokens': max_new_tokens}
+        if temperature > 0:
+            gen_kwargs.update({
+                'do_sample': True,
+                'temperature': temperature,
+                'top_p': top_p,
+                'top_k': top_k,
+            })
+        return gen_kwargs
+
+
 class ParameterCaster(object):
 
     def __init__(self, param_spec: Dict[str, Tuple[str, bool]]):
diff --git a/clams/backends/__init__.py b/clams/backends/__init__.py
new file mode 100644
index 0000000..d9fe452
--- /dev/null
+++ b/clams/backends/__init__.py
@@ -0,0 +1,9 @@
+"""
+Optional model-backend helpers for CLAMS apps.
+
+Each backend is a separate submodule. Heavy dependencies (e.g.,
+``torch``, ``transformers``) are NOT pulled in by the base
+``clams-python`` install; users opt in via pip extras such as
+``pip install clams-python[hf]`` for the HuggingFace transformers
+backend.
+"""
diff --git a/clams/backends/hf.py b/clams/backends/hf.py
new file mode 100644
index 0000000..b2dcfab
--- /dev/null
+++ b/clams/backends/hf.py
@@ -0,0 +1,247 @@
+"""
+HuggingFace transformers backend helpers.
+
+Two general loaders that wrap the device / kwargs / inference-mode
+boilerplate every HF-backed CLAMS app does identically:
+
+* :func:`load_hf_model` -- ``from_pretrained()`` flow for any model
+  class (instruction-tuned LLMs/VLMs, encoder-only classifiers,
+  vision/audio feature extractors, etc.). Use when the app needs raw
+  access to the underlying model and processor.
+* :func:`load_hf_pipeline` -- task-level :func:`transformers.pipeline`
+  flow (ASR, NER, text classification, zero-shot, etc.). Use when
+  pipeline-level inference is sufficient.
+
+``torch`` and ``transformers`` are optional dependencies. Install them
+via the ``[hf]`` extra::
+
+    pip install clams-python[hf]
+
+Imports are lazy: this module can be referenced from
+:mod:`clams.app` without triggering an ``ImportError`` on a base
+``clams-python`` install. The :class:`ImportError` only fires when a
+loader is actually called without the extras.
+"""
+from typing import Any, Optional, Tuple, Union
+
+
+def load_hf_model(
+        model_id: str,
+        model_cls,
+        processor_cls=None,
+        dtype=None,
+        device: Optional[str] = None,
+        padding_side: Optional[str] = None,
+        revision: Optional[str] = None,
+        model_kwargs: Optional[dict] = None,
+        processor_kwargs: Optional[dict] = None,
+        move_to_device: bool = True,
+) -> Tuple[Any, Any, str]:
+    """
+    Load a HuggingFace ``transformers`` model via ``from_pretrained``
+    and return it ready for inference.
+
+    :param model_id: HuggingFace model identifier (e.g., a Hub repo
+        name or a local path) forwarded to ``from_pretrained``.
+    :param model_cls: a ``transformers`` model class (e.g.,
+        ``AutoModelForCausalLM``, ``AutoModelForImageTextToText``,
+        ``ConvNextV2Model``, ``ViTModel``, ...). Whatever supports
+        ``from_pretrained()``.
+    :param processor_cls: a processor / tokenizer / feature-extractor
+        class with ``from_pretrained()``. Defaults to
+        ``transformers.AutoProcessor``. Pass ``transformers.AutoTokenizer``,
+        ``transformers.AutoImageProcessor``, etc. for narrower cases.
+        Pass ``None`` explicitly to skip processor loading entirely
+        (the returned ``processor`` in that case is ``None``).
+    :param dtype: torch dtype for the model (e.g., ``torch.bfloat16``).
+        When ``None`` (default), no ``torch_dtype`` kwarg is forwarded
+        to ``from_pretrained`` -- the model class uses its own default
+        (typically float32). Set explicitly for low-precision LLM
+        inference.
+    :param device: target device string (e.g., ``'cuda'``, ``'cpu'``,
+        ``'cuda:0'``). When ``None`` (default), the helper auto-detects
+        cuda availability and falls back to cpu.
+    :param padding_side: if set (typically ``'left'`` for decoder-only
+        models doing batched generation), the helper configures the
+        underlying tokenizer's ``padding_side`` and -- when no pad
+        token is set -- uses the EOS token as the pad token. Leave
+        ``None`` for encoder / non-batched cases (the tokenizer's own
+        default is preserved).
+    :param revision: optional Git revision (commit hash, branch name,
+        or tag) on the Hub repository to pin the download to. When
+        set, forwarded as ``revision=...`` to both
+        ``model_cls.from_pretrained`` and
+        ``processor_cls.from_pretrained``, ensuring the model and
+        processor are loaded from the same commit. Strongly recommended
+        for production: pinning a commit hash makes the analyzer
+        artifact reproducible and immune to upstream silent updates.
+        Apps calling this helper directly should record the same hash
+        on ``analyzer_version`` (or ``analyzer_versions``) in
+        ``metadata.py`` so the output MMIF identifies the exact
+        artifact. Apps inheriting from
+        :class:`~clams.app.ClamsHFPromptableApp` do not call this
+        helper -- the base class reads ``analyzer_versions`` from the
+        app metadata and forwards the resolved revision automatically.
+    :param model_kwargs: extra kwargs forwarded to
+        ``model_cls.from_pretrained()`` (e.g.,
+        ``{'use_safetensors': True, 'add_pooling_layer': False}``).
+    :param processor_kwargs: extra kwargs forwarded to
+        ``processor_cls.from_pretrained()`` (e.g.,
+        ``{'use_safetensors': True, 'use_fast': True}``).
+    :param move_to_device: when ``True`` (default), the helper moves
+        the loaded model to the resolved device and switches it to
+        ``eval()`` mode -- the right behavior for a "ready for
+        inference" app loader. When ``False``, both steps are
+        skipped; the model is returned in the state
+        ``from_pretrained`` left it (on CPU, in train mode). Use
+        ``False`` for library-style HF wrappers that defer device
+        placement and inference-mode switching to a downstream
+        consumer (e.g. an extractor class that may be combined with
+        a head and only then placed on a device by the wrapping
+        classifier). The returned ``device`` is still the resolved
+        target, so the consumer can use it later for its own
+        ``.to(device)`` call.
+
+    :returns: ``(processor, model, device)`` tuple. ``processor`` is
+        the loaded processor/tokenizer/feature-extractor (or ``None``
+        if ``processor_cls`` was explicitly set to ``None``).
+        ``device`` is the resolved device string (the model was moved
+        there iff ``move_to_device=True``).
+    :rtype: Tuple[Any, Any, str]
+    :raises ImportError: if ``torch`` or ``transformers`` is not
+        installed. Install the ``[hf]`` extra to fix.
+    """
+    try:
+        import torch  # pytype: disable=import-error
+    except ImportError as e:
+        raise ImportError(
+            "clams.backends.hf requires the `torch` package. "
+            "Install with: pip install clams-python[hf]"
+        ) from e
+    try:
+        import transformers  # pytype: disable=import-error
+    except ImportError as e:
+        raise ImportError(
+            "clams.backends.hf requires the `transformers` package. "
+            "Install with: pip install clams-python[hf]"
+        ) from e
+
+    resolved_device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
+
+    # Processor.
+    if processor_cls is None and processor_kwargs is None:
+        # default to AutoProcessor
+        processor_cls = transformers.AutoProcessor
+    if processor_cls is not None:
+        processor_load_kwargs = dict(processor_kwargs or {})
+        if revision is not None:
+            processor_load_kwargs.setdefault('revision', revision)
+        processor = processor_cls.from_pretrained(
+            model_id, **processor_load_kwargs)
+        if padding_side is not None:
+            tokenizer = getattr(processor, 'tokenizer', processor)
+            tokenizer.padding_side = padding_side
+            if getattr(tokenizer, 'pad_token', None) is None:
+                eos = getattr(tokenizer, 'eos_token', None)
+                if eos is not None:
+                    tokenizer.pad_token = eos
+    else:
+        processor = None
+
+    # Model.
+    model_load_kwargs = dict(model_kwargs or {})
+    if dtype is not None:
+        model_load_kwargs['torch_dtype'] = dtype
+    if revision is not None:
+        model_load_kwargs.setdefault('revision', revision)
+    model = model_cls.from_pretrained(model_id, **model_load_kwargs)
+    if move_to_device:
+        model = model.to(resolved_device)
+        model.eval()
+
+    return processor, model, resolved_device
+
+
+def load_hf_pipeline(
+        task: str,
+        model_id: str,
+        device: Optional[Union[str, int]] = None,
+        revision: Optional[str] = None,
+        model_kwargs: Optional[dict] = None,
+        pipeline_kwargs: Optional[dict] = None,
+) -> Tuple[Any, Union[str, int]]:
+    """
+    Load a HuggingFace :func:`transformers.pipeline` for ``task`` and
+    return it ready for inference. Wraps the device / revision /
+    kwargs-forwarding boilerplate that every pipeline-backed CLAMS
+    app does identically. Use this for apps wrapping a task-level
+    pipeline (ASR via ``"automatic-speech-recognition"``, NER via
+    ``"token-classification"``, text classification, zero-shot, etc.);
+    use :func:`load_hf_model` instead when the app needs raw access
+    to the underlying model / processor (e.g., for custom chat-template
+    formatting or batched ``generate`` calls).
+
+    :param task: pipeline task string forwarded to
+        :func:`transformers.pipeline` (e.g.,
+        ``"automatic-speech-recognition"``, ``"token-classification"``).
+    :param model_id: HuggingFace model identifier (Hub repo name or
+        local path) forwarded to ``pipeline(model=...)``.
+    :param device: target device. Accepts the string form
+        (``'cuda'``, ``'cpu'``, ``'cuda:0'``) for parity with
+        :func:`load_hf_model`, or the integer form accepted natively
+        by ``pipeline`` (``-1`` for CPU, ``0+`` for GPU index). When
+        ``None`` (default), auto-detects cuda availability and falls
+        back to cpu (string form).
+    :param revision: optional Git revision (commit hash, branch, or
+        tag) on the Hub to pin the download to. Strongly recommended
+        for production; see :func:`load_hf_model` for rationale.
+    :param model_kwargs: extra kwargs forwarded to the underlying
+        ``model.from_pretrained()`` via the
+        ``pipeline(model_kwargs={...})`` channel.
+    :param pipeline_kwargs: extra kwargs forwarded directly to
+        :func:`transformers.pipeline` (e.g. ``generate_kwargs``,
+        ``tokenizer``, ``feature_extractor``, ``batch_size``,
+        ``framework``). ``model``, ``task``, ``device``, ``revision``,
+        and ``model_kwargs`` are owned by this helper -- explicit
+        helper args take precedence if any collide.
+    :returns: ``(pipeline, device)`` tuple. ``device`` is the resolved
+        device the pipeline is on, in the form it was passed (or the
+        auto-resolved string form when ``device=None``).
+    :rtype: Tuple[Any, Union[str, int]]
+    :raises ImportError: if ``torch`` or ``transformers`` is not
+        installed. Install the ``[hf]`` extra to fix.
+    """
+    try:
+        import torch  # pytype: disable=import-error
+    except ImportError as e:
+        raise ImportError(
+            "clams.backends.hf requires the `torch` package. "
+            "Install with: pip install clams-python[hf]"
+        ) from e
+    try:
+        from transformers import pipeline  # pytype: disable=import-error
+    except ImportError as e:
+        raise ImportError(
+            "clams.backends.hf requires the `transformers` package. "
+            "Install with: pip install clams-python[hf]"
+        ) from e
+
+    resolved_device = device if device is not None else (
+        'cuda' if torch.cuda.is_available() else 'cpu')
+
+    pipeline_call_kwargs = dict(pipeline_kwargs or {})
+    # Helper-owned keys: explicit args win on collision.
+    for owned in ('task', 'model', 'device'):
+        pipeline_call_kwargs.pop(owned, None)
+    if model_kwargs:
+        pipeline_call_kwargs['model_kwargs'] = dict(model_kwargs)
+    if revision is not None:
+        pipeline_call_kwargs['revision'] = revision
+
+    pipe = pipeline(
+        task,
+        model=model_id,
+        device=resolved_device,
+        **pipeline_call_kwargs,
+    )
+    return pipe, resolved_device
diff --git a/clams/develop/__init__.py b/clams/develop/__init__.py
index 4925780..5cbd84f 100644
--- a/clams/develop/__init__.py
+++ b/clams/develop/__init__.py
@@ -18,7 +18,13 @@
         'description': 'GtiHub Actions workflow files specific to `clamsproject` GitHub organization',
         'sourcedir': 'gha',
         'targetdir': '.github',
-    }
+    },
+    'utl-tf': {
+        'description': 'Local helper module for iterating TimeFrames and collecting per-TF frame tasks '
+                       '(baked into ``utils/timeframe.py``; backend-agnostic, safe to edit/delete)',
+        'sourcedir': 'utl-tf',
+        'targetdir': 'utils',
+    },
 }
 
 
@@ -65,12 +71,20 @@ def bake(self, update_level=0):
             if recipe == 'gha':
                 # There's nothing for devs to tweak GHA template, so first generation and updating are the same.
                 self.bake_gha(src_dir, dst_dir)
+            if recipe.startswith('utl-'):
+                # Utility recipes bake static helper modules; once baked the
+                # code is local to the app and devs are free to edit. No
+                # templating-variable substitution is needed -- pass an
+                # empty dict so ``safe_substitute`` is a no-op.
+                if dst_dir.exists() and update_level == 0:
+                    raise FileExistsError(f"  {dst_dir} already exists. Did you mean `--update`? ")
+                self.bake_app(src_dir, dst_dir, {})
             
     def bake_app(self, src_dir, dst_dir, templating_vars):
         for g in src_dir.glob("**/*.template"):
             r = g.relative_to(src_dir).parent
             f = g.with_suffix('').name
-            (dst_dir / r).mkdir(exist_ok=True)
+            (dst_dir / r).mkdir(parents=True, exist_ok=True)
             
             with open(g, 'r') as in_f, open(dst_dir/r/f, 'w') as out_f:
                 tmpl_to_compile = Template(in_f.read())
diff --git a/clams/develop/templates/app/app.py.template b/clams/develop/templates/app/app.py.template
index a7a4cc5..d2eb9b0 100644
--- a/clams/develop/templates/app/app.py.template
+++ b/clams/develop/templates/app/app.py.template
@@ -24,19 +24,64 @@ from mmif import Mmif, View, Annotation, Document, AnnotationTypes, DocumentType
 from lapps.discriminators import Uri
 
 
+# =============================================================================
+# Pick a base class for your app:
+#
+#   ClamsApp ............ default; the rest of this scaffold inherits from it.
+#                         Implement ``_annotate()``. That's it.
+#                         Choose for any non-LLM/VLM app: classical OCR /
+#                         ASR engines, classifiers, rule-based tools, etc.
+#
+#   ClamsPromptableApp .. for prompt-driven LLM/VLM/ALM/LMM apps wrapping a
+#                         non-HF backend (remote APIs like OpenAI/Anthropic,
+#                         vLLM, custom inference servers).
+#                         Implement: ``_annotate()`` + ``generate()``.
+#                         Import:
+#                             from clams import ClamsPromptableApp
+#                         Also in ``metadata.py``: uncomment the
+#                         ``inject_promptable_parameters`` block.
+#
+#   ClamsHFPromptableApp  for prompt-driven apps wrapping a local HuggingFace
+#                         ``transformers`` model (the typical VLM/LLM case).
+#                         Implement: ``_annotate()`` (call
+#                         ``self.load_model(parameters['model'])`` first) +
+#                         declare class attributes:
+#                             MODEL_CLS = <transformers.AutoModelFor...>
+#                             DTYPE = torch.bfloat16        # optional
+#                             PADDING_SIDE = 'left'          # optional
+#                         Import:
+#                             from clams.app import ClamsHFPromptableApp
+#                         Also in ``metadata.py``: set
+#                         ``analyzer_versions={<hf-id>: <commit-hash>, ...}``
+#                         on the ``AppMetadata`` call, and uncomment the
+#                         ``ClamsHFPromptableApp.inject_promptable_parameters``
+#                         block (the HF override of the plain helper).
+#                         Requires the ``[hf]`` extra:
+#                             pip install clams-python[hf]
+#                         Singleton ``analyzer_versions`` families pre-load
+#                         in ``__init__`` (warm start); multi-member
+#                         families load on the first ``load_model`` call
+#                         and cache thereafter. ``generate()``,
+#                         ``build_conversation``, and ``build_gen_kwargs``
+#                         have working defaults; override only for
+#                         model-specific quirks.
+#
+# See https://clams.ai/clams-python/app-baseclasses.html for the full
+# developer guide.
+# =============================================================================
 class $APP_CLASS_NAME(ClamsApp):
 
     def __init__(self):
         super().__init__()
 
     def _appmetadata(self):
-        # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata
+        # see https://clams.ai/clams-python/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata
         # Also check out ``metadata.py`` in this directory.
         # When using the ``metadata.py`` leave this do-nothing "pass" method here.
         pass
 
     def _annotate(self, mmif: Mmif, **parameters) -> Mmif:
-        # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate
+        # see https://clams.ai/clams-python/autodoc/clams.app.html#clams.app.ClamsApp._annotate
         raise NotImplementedError
 
 def get_app():
diff --git a/clams/develop/templates/app/metadata.py.template b/clams/develop/templates/app/metadata.py.template
index 93aec79..2de03a5 100644
--- a/clams/develop/templates/app/metadata.py.template
+++ b/clams/develop/templates/app/metadata.py.template
@@ -16,8 +16,8 @@ def appmetadata() -> AppMetadata:
     """
     Function to set app-metadata values and return it as an ``AppMetadata`` obj.
     Read these documentations before changing the code below
-    - https://sdk.clams.ai/appmetadata.html metadata specification.
-    - https://sdk.clams.ai/autodoc/clams.appmetadata.html python API
+    - https://clams.ai/clams-python/appmetadata.html metadata specification.
+    - https://clams.ai/clams-python/autodoc/clams.appmetadata.html python API
     
     :return: AppMetadata object holding all necessary information.
     """
@@ -51,7 +51,48 @@ def appmetadata() -> AppMetadata:
     metadata.add_parameter(name='a_param', description='example parameter description',
                            type='boolean', default='false')
     # metadta.add_parameter(more...)
-    
+
+    # If your app subclasses ``ClamsPromptableApp`` (a prompt-driven LLM/VLM/audio-LM
+    # app on a non-HF backend), uncomment the following two lines to add the
+    # SDK-managed promptable parameters (prompt, systemPrompt, temperature,
+    # maxNewTokens, etc.) to your app's metadata. See
+    # https://clams.ai/clams-python/app-baseclasses.html#promptable for the
+    # developer guide. Reminder: these parameter names are reserved by the SDK;
+    # do not redeclare any of them above.
+    # from clams.app import ClamsPromptableApp
+    # ClamsPromptableApp.inject_promptable_parameters(metadata)
+    #
+    # If your app subclasses ``ClamsHFPromptableApp`` (HF transformers backend),
+    # use the HF override of the same helper -- it injects the promptable
+    # parameters AND a ``model`` parameter derived from ``analyzer_versions``.
+    # Also set ``analyzer_versions={<hf-id>: <commit-hash>, ...}`` on the
+    # ``AppMetadata(...)`` call above (replaces the singular
+    # ``analyzer_version`` for HF apps). See
+    # https://clams.ai/clams-python/app-baseclasses.html#hf-promptable for details.
+    # from clams.app import ClamsHFPromptableApp
+    # ClamsHFPromptableApp.inject_promptable_parameters(metadata)
+    #
+    # To customize the default value of any promptable parameter (e.g. provide an
+    # app-specific default ``prompt``, raise ``maxNewTokens``, pin ``parallelPrompts``,
+    # etc.), mutate the ``default`` field on the already-injected parameter
+    # object; the SDK does NOT allow re-declaring promptable param names. See
+    # https://clams.ai/clams-python/app-baseclasses.html#promptable-customizing-defaults
+    # for details. Example:
+    # for p in metadata.parameters:
+    #     if p.name == 'prompt':
+    #         p.default = ['Describe what is in this image.']
+    #     elif p.name == 'maxNewTokens':
+    #         p.default = 2048
+    #
+    # HF-only: the ``model`` parameter the HF helper injects gets its ``default``
+    # auto-set to the only key when ``analyzer_versions`` has a single entry
+    # (singleton family); for multi-member families the default is ``None`` and
+    # the caller MUST pass ``model=...`` on every request. To provide a
+    # recommended pick instead, mutate ``default`` the same way:
+    # for p in metadata.parameters:
+    #     if p.name == 'model':
+    #         p.default = '<org>/<one-of-the-keys-from-analyzer_versions>'
+
     # CHANGE this line and make sure return the compiled `metadata` instance
     return None
 
diff --git a/clams/develop/templates/utl-tf/__init__.py.template b/clams/develop/templates/utl-tf/__init__.py.template
new file mode 100644
index 0000000..e69de29
diff --git a/clams/develop/templates/utl-tf/timeframe.py.template b/clams/develop/templates/utl-tf/timeframe.py.template
new file mode 100644
index 0000000..d2c8d7b
--- /dev/null
+++ b/clams/develop/templates/utl-tf/timeframe.py.template
@@ -0,0 +1,154 @@
+"""
+TimeFrame iteration / frame-sampling helpers, local to this app.
+
+Generated by ``clams develop -r utl-tf``. The code in this file is part
+of your app, not the SDK; edit it freely, refactor as needed, or delete
+the whole file if your app does not iterate TimeFrame annotations.
+
+The functions here factor out the canonical pattern that any CLAMS app
+processing video by TimeFrames tends to write:
+
+  1. iterate TimeFrame annotations across input views, optionally
+     filtered by label
+  2. sample frames per TF using the universal ``tfSamplingMode``
+     parameter (representative TimePoints, the middle representative,
+     or every target / native-FPS frame)
+  3. when ``vdh`` returns a fallback timestamp (milliseconds, no
+     existing TP behind it), mint a fresh ``TimePoint`` annotation in
+     the app's new view so downstream code has a stable anchor id
+  4. assemble per-TF task tuples that downstream batching /
+     inference / annotation code can consume uniformly
+
+The helpers are backend-agnostic: tasks can feed a HuggingFace VLM, a
+remote LLM API, a classical CV pipeline, or any other per-frame
+processor. They have no dependency on ``clams.app.ClamsPromptableApp``
+or any other promptable / inference machinery.
+
+These functions are scaffolded into each app so individual apps can
+edit them freely while the pattern stabilizes across the ecosystem.
+Once the shape converges across several apps, the helpers are good
+candidates for promotion into a shared package -- either
+``mmif.utils`` (for the pure-MMIF iteration / TP minting pieces, which
+have no clams-app dependency) or ``clams.<helpers>`` (for the
+task-tuple composition that does presuppose the "writing into a new
+view" CLAMS-app idiom). If/when that happens, apps would import the
+shared version and delete this local copy.
+"""
+from typing import Any, Iterator, List, Optional, Tuple, Union
+
+from mmif import Annotation, Document, Mmif, View, AnnotationTypes
+from mmif.utils import video_document_helper as vdh
+
+
+def iter_timeframes(
+        mmif: Mmif, tflabels_of_interest: List[str],
+) -> Iterator[Annotation]:
+    """
+    Yield every TimeFrame annotation in ``mmif``, filtered by
+    ``tflabels_of_interest`` when non-empty.
+
+    :param mmif: the input MMIF object.
+    :param tflabels_of_interest: when non-empty, only TFs whose
+        ``label`` property matches one of these are yielded. An
+        empty list (the default in most apps) yields every TF
+        regardless of label.
+    """
+    for view in mmif.get_all_views_contain(AnnotationTypes.TimeFrame):
+        for tf in view.get_annotations(AnnotationTypes.TimeFrame):
+            if (tflabels_of_interest
+                    and tf.get_property('label') not in tflabels_of_interest):
+                continue
+            yield tf
+
+
+def to_timepoints(
+        parent_view: View,
+        video_doc: Document,
+        sources: List[Union[str, int]],
+) -> List[str]:
+    """
+    Normalize a list of frame ``sources`` (as returned by
+    :func:`vdh.extract_images_by_mode_with_sources`) into a parallel
+    list of TimePoint ``id``\\ s.
+
+    Each ``source`` is either:
+
+    * ``str`` -- the id of an existing TimePoint annotation
+      (representative / target). Passed through unchanged.
+    * ``int`` -- a millisecond timestamp returned by ``vdh`` for the
+      interval-fallback case (``tfSamplingMode=single`` with no
+      representatives, or ``tfSamplingMode=all`` with no targets).
+      A fresh ``TimePoint`` annotation is minted in ``parent_view``
+      at this timestamp; the new annotation's id is returned.
+
+    The ``TimePoint`` type is registered with
+    ``parent_view.new_contain()`` lazily on the first mint, so apps
+    that never hit the fallback path do not get an empty
+    ``TimePoint`` entry in their view metadata.
+
+    :param parent_view: the view this app is writing into; receives
+        any freshly-minted TimePoints.
+    :param video_doc: the source VideoDocument; recorded as
+        ``document`` on each minted TimePoint.
+    :param sources: per-frame source identifiers from ``vdh``.
+    :return: a list of TimePoint ids, parallel to ``sources``.
+    """
+    tp_contain_registered = False
+    out: List[str] = []
+    for src in sources:
+        if isinstance(src, str):
+            out.append(src)
+        else:
+            if not tp_contain_registered:
+                parent_view.new_contain(AnnotationTypes.TimePoint)
+                tp_contain_registered = True
+            tp = parent_view.new_annotation(
+                AnnotationTypes.TimePoint,
+                document=video_doc.id,
+                timePoint=int(src),
+                timeUnit='milliseconds',
+            )
+            out.append(tp.id)
+    return out
+
+
+def collect_timeframes_of_interest(
+        mmif: Mmif,
+        parent_view: View,
+        video_doc: Document,
+        tflabels_of_interest: List[str],
+) -> List[Tuple[List[Any], List[str], str, Optional[str]]]:
+    """
+    Convenience composition of :func:`iter_timeframes`,
+    :func:`vdh.extract_images_by_mode_with_sources`, and
+    :func:`to_timepoints`. Returns one
+    ``(images, tp_ids, tf_id, tf_label)`` task per matching TimeFrame
+    that produced at least one sampled frame.
+
+    Each task's ``images`` and ``tp_ids`` are parallel lists -- one
+    entry per frame sampled from that TF (length 1 for
+    ``tfSamplingMode=single``, N for ``representatives`` / ``all``).
+    Each entry of ``tp_ids`` is either the id of an existing
+    TimePoint or the id of a freshly-minted one (see
+    :func:`to_timepoints`). ``tf_label`` is the source TimeFrame's
+    ``label`` property value, or ``None`` if unset.
+
+    :param mmif: the input MMIF.
+    :param parent_view: the view this app is writing into.
+    :param video_doc: the source VideoDocument that frames are
+        extracted from.
+    :param tflabels_of_interest: optional label filter; empty list =
+        no filter.
+    :return: per-TF task tuples, ready to feed a batched inference
+        loop or any other per-frame processor.
+    """
+    tasks: List[Tuple[List[Any], List[str], str, Optional[str]]] = []
+    for tf in iter_timeframes(mmif, tflabels_of_interest):
+        images, sources = vdh.extract_images_by_mode_with_sources(
+            mmif, tf, as_PIL=True)
+        if not images:
+            continue
+        tp_ids = to_timepoints(parent_view, video_doc, sources)
+        tf_label = tf.get_property('label')
+        tasks.append((list(images), tp_ids, tf.id, tf_label))
+    return tasks
diff --git a/documentation/app-baseclasses.rst b/documentation/app-baseclasses.rst
new file mode 100644
index 0000000..6ac5cfc
--- /dev/null
+++ b/documentation/app-baseclasses.rst
@@ -0,0 +1,521 @@
+.. _app-baseclasses:
+
+Specialized App Base Classes
+============================
+
+Beyond the bare-minimum :class:`~clams.app.ClamsApp` introduced in
+:ref:`introduction`, the SDK provides specialized base classes that capture
+common structural patterns for CLAMS apps. Each specialized base class
+extends :class:`~clams.app.ClamsApp` with a standardized runtime parameter
+surface and helper methods appropriate to its category of app. App
+developers inherit from the specialized base class that best matches what
+their app does, instead of inheriting from :class:`~clams.app.ClamsApp`
+directly.
+
+This page first recaps what every CLAMS app inherits from
+:class:`~clams.app.ClamsApp` (the baseline), then documents each
+specialized base class and what it adds on top.
+
+.. _app-baseline:
+
+What every CLAMS app inherits
+-----------------------------
+
+Every CLAMS app subclasses :class:`~clams.app.ClamsApp` (directly or via
+a specialized base class such as :class:`~clams.app.ClamsPromptableApp`)
+and inherits its baseline behaviors: parameter casting and refinement,
+view signing, JSON envelope unwrapping, CUDA memory profiling and
+cleanup, error views, and a set of *universal* runtime parameters that
+the SDK auto-injects into every app's metadata.
+
+Universal parameters
+^^^^^^^^^^^^^^^^^^^^
+
+Added automatically by :meth:`~clams.app.ClamsApp.__init__` at runtime
+and by the standard ``metadata.py`` template's ``__main__`` block at
+``python metadata.py`` time. App developers do not declare them.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 18 12 18 8 44
+
+   * - Name
+     - Type
+     - Default
+     - Multi-valued
+     - Notes
+   * - ``pretty``
+     - boolean
+     - ``false``
+     - no
+     - When ``true``, the response MMIF JSON is re-formatted with
+       2-space indentation.
+   * - ``runningTime``
+     - boolean
+     - ``true``
+     - no
+     - When ``true``, the running time of the request is recorded in
+       the view metadata.
+   * - ``hwFetch``
+     - boolean
+     - ``false``
+     - no
+     - When ``true``, host hardware info (architecture, GPU and vRAM)
+       is recorded in the view metadata.
+   * - ``tfSamplingMode``
+     - string
+     - ``'representatives'``
+     - no
+     - For apps that process ``TimeFrame`` annotations: how to sample
+       frames within each TimeFrame. Choices: ``'representatives'``,
+       ``'single'``, ``'all'``. No effect on apps that do not process
+       TimeFrames.
+
+.. _sdk-managed-reserved:
+
+SDK-managed parameter names are reserved
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Parameter names added by the SDK (the universal parameters listed
+above, plus any parameters added by a specialized base class) are
+reserved. An app's ``appmetadata()`` MUST NOT declare any of these
+names via :meth:`AppMetadata.add_parameter` directly; doing so trips
+the existing duplicate-name ``ValueError`` when the SDK tries to add
+its own spec.
+
+This reservation guarantees a uniform, predictable parameter interface
+across all CLAMS apps. App developers can still customize a reserved
+parameter's *default value* (but not its ``type``, ``multivalued``, or
+``choices``) by mutating the ``default`` field on the already-injected
+parameter object; see :ref:`promptable-customizing-defaults` for a
+worked example.
+
+.. _promptable:
+
+Promptable CLAMS Apps
+---------------------
+
+A **promptable app** is a CLAMS app that wraps a promptable model: a large
+language model (LLM), vision-language model (VLM), audio-language model
+(ALM), large multimodal model (LMM), or remote generative API. The SDK
+provides :class:`~clams.app.ClamsPromptableApp` as a specialized base class
+for these apps. It standardizes the runtime parameter surface (prompts,
+generation hyperparameters, batch size) and provides helpers for building
+chat conversations and persisting model responses into MMIF.
+
+This section is the developer guide for writing or migrating a CLAMS app
+that inherits from :class:`~clams.app.ClamsPromptableApp`. For the general
+CLAMS app development pattern, see the :ref:`introduction`,
+:ref:`tutorial`, and :ref:`runtime-params` pages.
+
+When to use ``ClamsPromptableApp``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Choose :class:`~clams.app.ClamsPromptableApp` over :class:`~clams.app.ClamsApp`
+when your app's core operation is "given a prompt and some input
+(image/audio/text/structured data), return generated text." Concretely:
+
+- Image captioning, VLM-based OCR, scene description
+- Audio captioning, transcription via ALMs
+- Summarization, classification, structured-data extraction via LLMs
+- Tasks driven by an LMM that takes mixed-modality inputs
+- Any app that wraps a remote LLM, VLM, ALM, or LMM API and forwards a prompt
+
+If your app does not call a generative model (e.g. a classical OCR engine,
+a speech-to-text engine that doesn't take prompts, a classifier wrapping a
+discriminative model), keep using :class:`~clams.app.ClamsApp` directly.
+
+.. note::
+
+   ``ClamsPromptableApp`` assumes an **instruction- or chat-tuned**
+   model with a system/user/assistant role structure. Bare completion
+   / next-token-prediction base models do not fit this base class
+   cleanly; use :class:`~clams.app.ClamsApp` directly for those.
+
+Standardized runtime parameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Every :class:`~clams.app.ClamsPromptableApp` exposes the following
+SDK-managed runtime parameters in addition to the universal parameters
+from :class:`~clams.app.ClamsApp`. These names are reserved; see
+:ref:`sdk-managed-reserved`.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 18 12 18 8 44
+
+   * - Name
+     - Type
+     - Default
+     - Multi-valued
+     - Notes
+   * - ``prompt``
+     - string
+     - *(required, no default)*
+     - yes
+     - User prompt(s) sent to the model. A single value runs as a one-shot
+       generation. A multi-value list is interpreted as a multi-turn static
+       prompt; see :ref:`promptable-multiturn`.
+   * - ``systemPrompt``
+     - string
+     - ``''``
+     - no
+     - Optional system-role text prepended to the conversation.
+   * - ``promptMode``
+     - string
+     - ``'turn-taking'``
+     - no
+     - How to interpret a multi-value ``prompt`` list. Choices:
+       ``'turn-taking'`` or ``'user-only'``. See :ref:`promptable-multiturn`.
+   * - ``maxNewTokens``
+     - integer
+     - ``512``
+     - no
+     - Maximum number of new tokens generated per inference call. Larger values
+       grow the KV cache linearly and add to GPU memory usage; reduce if VRAM
+       is constrained.
+   * - ``temperature``
+     - number
+     - ``0.0``
+     - no
+     - Sampling temperature. ``0.0`` selects deterministic / greedy decoding
+       for maximum reproducibility; override for sampled generation.
+   * - ``topP``
+     - number
+     - ``1.0``
+     - no
+     - Nucleus-sampling cumulative probability cutoff. Only meaningful when
+       ``temperature`` > 0.
+   * - ``topK``
+     - integer
+     - ``50``
+     - no
+     - Top-K sampling cutoff. Only meaningful when ``temperature`` > 0.
+   * - ``parallelPrompts``
+     - integer
+     - ``1``
+     - no
+     - Number of independent prompts the app stacks into a single
+       forward pass. Per-prompt content size is the app's
+       responsibility; prompt count and per-prompt size combine
+       multiplicatively for GPU memory. Keep at ``1`` on memory-tight
+       setups; see the parameter's own description in
+       :py:attr:`~clams.app.ClamsPromptableApp.promptable_parameters`
+       for an OOM-risk example.
+
+.. _promptable-customizing-defaults:
+
+Customizing default values
+""""""""""""""""""""""""""
+
+The SDK ships sensible defaults for most promptable parameters but
+deliberately leaves ``prompt`` **without** a default; prompts are
+inherently app-specific and no single value is right for all apps.
+Beyond ``prompt``, other defaults may also be inappropriate for a given
+app: a model that needs longer outputs wants a higher ``maxNewTokens``,
+a small-VRAM deployment wants ``parallelPrompts`` pinned at ``1``, etc.
+
+Because the reservation rule prevents calling
+``metadata.add_parameter('prompt', ...)`` (or any other promptable name)
+directly, the recommended pattern for customizing defaults is to mutate
+the ``default`` field on the already-injected parameter object right
+after calling :meth:`~clams.app.ClamsPromptableApp.inject_promptable_parameters`.
+You'll see a worked example of this in the ``metadata.py`` generated
+by the ``clams develop`` scaffold.
+
+This works for any promptable parameter. The parameter spec itself
+(``type``, ``multivalued``, ``choices``) stays locked by the SDK; only
+the ``default`` field is meant to be mutated this way, which preserves
+the cross-app uniformity that the reservation rule is designed to
+guarantee.
+
+If an app *wants* to require callers to pass a value explicitly (for
+``prompt`` or any other parameter), it can simply leave the default
+unchanged. ``prompt`` already has no default, so the SDK will raise a
+"required parameter" error if the caller omits it; for other params,
+deleting the SDK default and leaving it ``None`` would have the same
+effect, though that's rarely useful.
+
+.. _promptable-declaration:
+
+Declaring a promptable app
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A promptable app requires two paired edits relative to the scaffold
+generated by ``clams develop``:
+
+1. In ``app.py``, change the app class's base from
+   :class:`~clams.app.ClamsApp` to
+   :class:`~clams.app.ClamsPromptableApp` and implement
+   :meth:`~clams.app.ClamsPromptableApp.generate`. The scaffold file
+   already contains a guiding comment at the class declaration line.
+2. In ``metadata.py``, call
+   :meth:`ClamsPromptableApp.inject_promptable_parameters
+   <clams.app.ClamsPromptableApp.inject_promptable_parameters>` at
+   the end of ``appmetadata()``. The scaffold file already contains
+   a commented-out helper-call block; uncomment it.
+
+The ``__main__`` block in ``metadata.py`` is unchanged from
+non-promptable apps. The helper call inside ``appmetadata()`` makes
+the promptable parameters visible to both ``python metadata.py``
+(build-time discovery) and to
+:meth:`~clams.app.ClamsApp._load_appmetadata` (runtime). The base
+class change ensures the app inherits the parameter-presence
+validation, the ``generate()`` contract, and the helper methods at
+runtime.
+
+The ``generate()`` contract
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Subclasses of :class:`~clams.app.ClamsPromptableApp` that wrap a backend
+without a default SDK implementation (e.g., remote-API or custom local
+backends) MUST implement :meth:`~clams.app.ClamsPromptableApp.generate`.
+Subclasses of :class:`~clams.app.ClamsHFPromptableApp` inherit a concrete
+``generate()`` and do not need to override it. See the method's docstring
+for the full signature, batch semantics, and return value.
+
+Keep inference logic inside ``generate()`` distinct from MMIF I/O; the
+latter belongs in ``_annotate()`` (which calls ``self.generate()``).
+This separation lets HF-backed apps inherit the default ``generate()``
+without restating backend mechanics, and lets non-HF apps swap in a new
+``generate()`` without rewriting their MMIF I/O.
+
+.. _promptable-multiturn:
+
+Multi-turn handling (``promptMode``)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``prompt`` is always a ``List[str]`` after parameter casting. When the
+list has a single element, ``promptMode`` is irrelevant (single-shot
+generation). When the list has multiple elements, ``promptMode`` selects
+between two multi-element prompting strategies:
+
+**Turn-taking** (default). The list is interpreted as an alternating
+user/assistant conversation: even indices (0, 2, 4, ...) are user turns,
+odd indices are assistant turns. The full conversation is sent to the
+model in a single ``generate`` call. This mode supports any pattern
+that fits an alternating role structure, including few-shot in-context
+learning (where the (user, assistant) pairs are task exemplars and the
+final user turn is the new query), multi-turn dialogue continuation,
+and role-play scaffolding. Example (few-shot ICL): ``["Classify
+sentiment: 'I love this.'", "positive", "Classify sentiment: 'I hate
+this.'", "negative", "Classify sentiment: 'It's okay.'"]``: two
+exemplar pairs followed by a final query; one inference returns the
+final reply.
+
+**User-only**. Every element is a user turn; the model generates an
+assistant reply between each, in N successive ``generate`` calls. Only
+the final assistant response is returned per input item. This mode
+implements iterative / scripted multi-step prompting, a manual,
+externally-driven scaffold for stepwise reasoning. (It is distinct
+from in-model zero-shot chain-of-thought, where stepwise reasoning is
+elicited inside a single inference call by a prompt like "let's think
+step by step"; here, the user-side scaffolding makes the steps
+explicit and feeds each intermediate model output back as context for
+the next user turn.) Example (scripted multi-step reasoning):
+``["Step 1: identify objects.", "Step 2: describe relationships.",
+"Step 3: conclude."]``: three sequential user prompts, three
+inferences, final reply returned.
+
+``turn-taking`` is the default because it costs a single inference call
+and is the more common multi-element pattern.
+
+Helpers
+^^^^^^^
+
+:meth:`~clams.app.ClamsPromptableApp.inject_promptable_parameters`
+    A static method called from your app's ``appmetadata()`` (in
+    ``metadata.py``) to add the SDK-managed promptable parameters.
+
+:meth:`~clams.app.ClamsPromptableApp.build_conversation`
+    Instance method that constructs a chat-template-compatible message
+    list (or a ``List[List[dict]]`` of progressively-extending prefixes
+    for ``user-only`` mode). Handles string and list prompt forms, the
+    two ``promptMode`` semantics, the optional ``systemPrompt``, and
+    inlines ``images`` / ``audios`` into the (final) user turn. Accepts
+    a pre-built ``List[dict]`` and returns it unchanged. Subclasses
+    may override to access model-specific state (e.g.
+    ``self.processor``) when formatting messages.
+
+:meth:`~clams.app.ClamsPromptableApp.response_to_grounded_textdocument`
+    Writes a ``TextDocument`` plus an ``Alignment`` (``source -> TD``)
+    into a view. ``source`` is the coarse cross-modal anchor; the
+    optional ``origins`` (paired with ``origination``) is the finer
+    derivation list, written to the TD's ``origins`` / ``origination``
+    properties. See https://clams.ai/clams-vocabulary/Document for
+    vocabulary semantics.
+
+.. _hf-promptable:
+
+HuggingFace Promptable Apps
+---------------------------
+
+For the very common case of "promptable CLAMS app + local HuggingFace
+``transformers`` model," the SDK provides
+:class:`~clams.app.ClamsHFPromptableApp`, a specialized subclass of
+:class:`~clams.app.ClamsPromptableApp` that absorbs all HF-specific
+inference boilerplate. Concrete apps inheriting from it declare the
+model via a few class attributes and typically only need to implement
+``_annotate()`` for their MMIF I/O.
+
+When to use
+^^^^^^^^^^^
+
+Choose :class:`~clams.app.ClamsHFPromptableApp` over plain
+:class:`~clams.app.ClamsPromptableApp` when your app:
+
+- wraps a local HuggingFace ``transformers`` model loadable via
+  ``from_pretrained()``, AND
+- runs the standard chat-template -> ``model.generate`` ->
+  ``batch_decode`` inference pipeline (every modern instruct-tuned
+  VLM/LLM in HF), AND
+- doesn't need bespoke pixel-value preprocessing or vision-token
+  stitching at inference time.
+
+If your app uses a remote API instead (OpenAI, Anthropic, etc.), or a
+non-HF local backend, inherit from
+:class:`~clams.app.ClamsPromptableApp` directly and implement
+:meth:`~clams.app.ClamsPromptableApp.generate` yourself.
+
+.. _hf-promptable-declaring:
+
+Declaring an HF promptable app
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+On top of the baseline declaration shared by every promptable app
+(see :ref:`promptable-declaration`), a
+:class:`~clams.app.ClamsHFPromptableApp` subclass:
+
+1. Uses :class:`~clams.app.ClamsHFPromptableApp` (not
+   :class:`~clams.app.ClamsPromptableApp`) as the base class in
+   ``app.py``.
+2. Declares the required class attribute ``MODEL_CLS`` and any
+   optional dtype / padding / kwargs hints (see
+   :ref:`hf-promptable-class-attrs` for the full list).
+3. Sets ``analyzer_versions={<hf-id>: <commit-hash>, ...}`` on the
+   ``AppMetadata`` constructor call in ``metadata.py`` (replaces the
+   singular ``analyzer_version`` for HF apps).
+4. Calls
+   :meth:`ClamsHFPromptableApp.inject_promptable_parameters
+   <clams.app.ClamsHFPromptableApp.inject_promptable_parameters>`
+   (the HF override of the plain helper) at the end of
+   ``appmetadata()``. The scaffold ``metadata.py`` contains a
+   commented-out HF block; uncomment it.
+5. Inherits the base class's
+   :meth:`~clams.app.ClamsPromptableApp.generate` implementation;
+   no override needed.
+
+For a minimal worked example, see the class docstring on
+:class:`~clams.app.ClamsHFPromptableApp`.
+
+.. _hf-promptable-class-attrs:
+
+Class-attribute hooks
+^^^^^^^^^^^^^^^^^^^^^
+
+Concrete subclasses declare the model class plus optional dtype /
+padding hints via class attributes, and declare the family of
+supported model variants (with pinned commits) via
+``analyzer_versions`` in ``metadata.py``:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 22 60 18
+
+   * - Attribute
+     - Meaning
+     - Required
+   * - ``MODEL_CLS``
+     - ``transformers`` model class (e.g.
+       :class:`~transformers.AutoModelForImageTextToText`,
+       :class:`~transformers.AutoModelForCausalLM`).
+     - yes
+   * - ``PROCESSOR_CLS``
+     - Processor / tokenizer / feature-extractor class. Defaults to
+       :class:`~transformers.AutoProcessor`.
+     - no
+   * - ``DTYPE``
+     - Torch dtype for the model and for ``pixel_values`` casting in
+       :py:meth:`~clams.app.ClamsHFPromptableApp.generate`. E.g.
+       ``torch.bfloat16`` for low-precision LLM inference.
+     - no
+   * - ``PADDING_SIDE``
+     - Tokenizer padding side. ``'left'`` for decoder-only batched
+       generation; leave unset otherwise.
+     - no
+   * - ``MODEL_KWARGS`` / ``PROCESSOR_KWARGS``
+     - Extra kwargs forwarded to the respective
+       ``from_pretrained()`` calls (e.g.
+       ``trust_remote_code=True``).
+     - no
+
+The HF model identifiers themselves are NOT a class attribute. They
+live in ``metadata.py`` as ``analyzer_versions``, a
+``Dict[str, str]`` mapping each supported model id to its pinned
+commit hash. The SDK auto-derives a ``model`` runtime parameter
+from this dict, with ``choices`` set to the dict keys.
+
+Family / singleton handling
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When ``analyzer_versions`` contains a single entry (the typical
+single-model app), the SDK eagerly pre-loads that one model in
+``__init__`` and sets ``model.default`` to the only key so callers
+can omit the parameter. Single-model apps thus preserve warm-start
+semantics: the model is loaded at app startup, not on first request.
+
+When ``analyzer_versions`` contains multiple entries (a family app),
+loading is deferred until the first :py:meth:`load_model` call inside
+``_annotate``, and ``model`` has no default by default; callers
+must pick a family member explicitly (or the dev mutates
+``model.default`` post-injection to provide a recommended pick).
+Loaded models are cached per ``(model_id, revision)`` for the
+lifetime of the app instance; switching models loads on first miss,
+cache-hits on repeat.
+
+Reproducibility: ``model`` refinement and view metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The user-facing ``model`` parameter accepts raw HF model ids
+(``org/repo-name``). The SDK's
+:py:meth:`~clams.app.ClamsHFPromptableApp._refine_params` expands the
+raw value to ``org/repo-name@<revision>`` form (using the dict
+lookup) during parameter refinement. The standard ``sign_view`` flow
+then stamps:
+
+- the **raw** user choice into ``view.metadata.parameters['model']``
+  (transparency: what the user typed),
+- the **resolved** ``org/repo-name@<revision>`` into
+  ``view.metadata.appConfiguration['model']`` (reproducibility: the
+  exact commit applied).
+
+A consumer of the output MMIF can read the resolved revision directly
+from the view metadata, with no cross-reference to the app metadata
+required.
+
+What the base class provides
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A subclass typically only writes ``_annotate()``. The base class
+supplies:
+
+* model loading and caching via
+  :py:meth:`~clams.app.ClamsHFPromptableApp.load_model`, which wraps
+  :func:`clams.backends.hf.load_hf_model` (non-promptable HF apps
+  can call that loader directly without going through this base
+  class);
+* the parameter injector
+  :py:meth:`ClamsHFPromptableApp.inject_promptable_parameters
+  <clams.app.ClamsHFPromptableApp.inject_promptable_parameters>`;
+* a concrete batched HF
+  :py:meth:`~clams.app.ClamsHFPromptableApp.generate`;
+* a default
+  :py:meth:`~clams.app.ClamsHFPromptableApp.build_gen_kwargs` that
+  maps the SDK promptable parameters to HF ``model.generate()``
+  kwargs.
+
+See each method's docstring for full details.
+
+Apps using the HF backend (with or without the promptable wrapper)
+must install the ``[hf]`` extra: ``pip install clams-python[hf]``.
+
diff --git a/documentation/index.rst b/documentation/index.rst
index 9f6db33..5882e11 100644
--- a/documentation/index.rst
+++ b/documentation/index.rst
@@ -16,6 +16,7 @@ CLAMS Python SDK
   introduction
   input-output
   runtime-params
+  app-baseclasses
   gpu-apps
   appmetadata
   appdirectory
diff --git a/documentation/introduction.rst b/documentation/introduction.rst
index ce907e8..96435c5 100644
--- a/documentation/introduction.rst
+++ b/documentation/introduction.rst
@@ -72,13 +72,16 @@ As a developer you can expose different behaviors of the ``annotate()`` method b
   These runtime configurations are not part of the MMIF input, but for reproducible analysis, you should record these configurations in the output MMIF. 
 
 .. note::
-  There are *universal* parameters defined at the SDK-level that all CLAMS apps commonly use. See :const:`clams.app.ClamsApp.universal_parameters`. 
+  Some runtime parameters are managed by the SDK itself rather than declared per-app. The *universal* parameters in :const:`clams.app.ClamsApp.universal_parameters` are one such set; they are auto-added to every CLAMS app. Specialized base classes (see below) add their own SDK-managed parameter sets on top.
 
 .. warning::
   All the runtime configurations should be pre-announced in the app metadata.
 
 Also see <:doc:`tutorial`> for a step-by-step tutorial on how to write the ``_annotate()`` method with a simple example NLP tool.
 
+.. note::
+  Inheriting from :class:`~clams.app.ClamsApp` directly works for any CLAMS app. For common app categories (e.g. apps wrapping LLM or other multimodal models), the SDK provides specialized base classes that extend :class:`~clams.app.ClamsApp` with additional SDK-managed parameter sets and helpers. See :ref:`app-baseclasses`.
+
 appmetadata()
 """""""""""""
 
diff --git a/documentation/modules.rst b/documentation/modules.rst
index 7897b4a..d25e3cd 100644
--- a/documentation/modules.rst
+++ b/documentation/modules.rst
@@ -7,5 +7,6 @@ API documentation
 
    autodoc/clams.app
    autodoc/clams.appmetadata
+   autodoc/clams.backends
    autodoc/clams.restify
    autodoc/clams.mmif_utils
diff --git a/documentation/runtime-params.rst b/documentation/runtime-params.rst
index 4d3bf93..146102e 100644
--- a/documentation/runtime-params.rst
+++ b/documentation/runtime-params.rst
@@ -190,6 +190,20 @@ For more complex value structures (e.g., comma-separated lists within values),
 the app developer is responsible for further parsing and should document the
 expected format in the parameter's ``description`` field.
 
+.. _runtime-params-promptable-note:
+
+Promptable apps: an extra SDK-managed parameter set
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For apps that wrap an **instruction- or chat-tuned** promptable model
+(an LLM or other multimodal model, local or remote), inherit from
+:class:`~clams.app.ClamsPromptableApp` instead of
+:class:`~clams.app.ClamsApp`. The promptable base class adds a
+standardized SDK-managed parameter set on top of the universal
+parameters; the names are reserved and are added via a single helper
+call inside ``appmetadata()``. See :ref:`promptable` for the full
+developer guide and parameter list.
+
 .. _runtime-params-envelope-note:
 
 Note on JSON envelope input
diff --git a/documentation/tutorial.md b/documentation/tutorial.md
index b7f7469..e5d2698 100644
--- a/documentation/tutorial.md
+++ b/documentation/tutorial.md
@@ -146,7 +146,7 @@ This means that if the user doesn't specify the value for these parameters at th
 If you want to make a parameter "optional" by providing a default value, you can do so by adding a `default` argument to the `add_parameter()` method.
 
 > **Note**
-> Also refer to [CLAMS App Metadata](https://sdk.clams.ai/appmetadata.html) for more details regarding what fields need to be specified.
+> Also refer to [CLAMS App Metadata](https://clams.ai/clams-python/appmetadata.html) for more details regarding what fields need to be specified.
 
 #### `_annotate()`
 The `_annotate()` method should accept a MMIF file/string/object as its first parameter and always returns a `MMIF` object with an additional `view` containing annotation results. This is where the bulk of your logic will go. For a text processing app, it is mostly concerned with finding text documents, calling the code that runs over the text, creating new views and inserting the results. 
@@ -228,20 +228,20 @@ First, with `text_value` we get the text from the text document, either from its
 
 ## Working with TimeFrame Annotations
 
-Many CLAMS apps process video by operating on TimeFrame annotations produced by an upstream app (e.g., scene detection, shot segmentation). A TimeFrame can carry structural members (currently called `targets` — a list of TimePoint IDs covering every frame in the segment), a salient subset of those members (currently called `representatives`), or simply `start`/`end` boundaries.
+Many CLAMS apps process video by operating on TimeFrame annotations produced by an upstream app (e.g., scene detection, shot segmentation). A TimeFrame can carry structural members (currently called `targets`; a list of TimePoint IDs covering every frame in the segment), a salient subset of those members (currently called `representatives`), or simply `start`/`end` boundaries.
 
 > **Note**
 > The property names `targets` and `representatives` are under review and may be renamed in a future MMIF spec version. See [mmif#238](https://github.com/clamsproject/mmif/issues/238) for the ongoing discussion. The SDK API will be updated accordingly.
 
 ### Frame sampling with `tfSamplingMode`
 
-When your app receives TimeFrame annotations, the caller can control which frames your app processes by setting the `tfSamplingMode` runtime parameter. This is a **universal parameter** — automatically available on every CLAMS app without any per-app configuration.
+When your app receives TimeFrame annotations, the caller can control which frames your app processes by setting the `tfSamplingMode` runtime parameter. This is a **universal parameter**: automatically available on every CLAMS app without any per-app configuration.
 
 There are three modes:
 
-- `representatives` (default) — use the frames listed in the TimeFrame's `representatives` property. If no representatives exist, the TimeFrame is skipped.
-- `single` — pick one frame: the middle representative if available, otherwise the midpoint of the start/end interval.
-- `all` — use every frame in `targets` if present, otherwise generate every frame in the start/end interval.
+- `representatives` (default): use the frames listed in the TimeFrame's `representatives` property. If no representatives exist, the TimeFrame is skipped.
+- `single`: pick one frame: the middle representative if available, otherwise the midpoint of the start/end interval.
+- `all`: use every frame in `targets` if present, otherwise generate every frame in the start/end interval.
 
 App developers do **not** need to handle this parameter themselves. The SDK intercepts it in `annotate()` and sets a context variable before `_annotate()` runs. Inside `_annotate()`, calls to `vdh.extract_frames_by_mode()` automatically read the active mode and select frames accordingly. The underlying per-mode functions (`_sample_representatives()`, `_sample_single()`, `_sample_all()`) in `mmif.utils.video_document_helper` are also available for apps that need frame numbers without extracting images.
 
diff --git a/pyproject.toml b/pyproject.toml
index 1b3fc4e..048410d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ classifiers = [
     "Programming Language :: Python :: 3 :: Only",
 ]
 dependencies = [
-    "mmif-python==1.4.0",
+    "mmif-python==1.5.0",
     "Flask>=2",
     "Flask-RESTful>=0.3.9",
     "gunicorn>=20",
@@ -39,6 +39,9 @@ source = "https://github.com/clamsproject/clams-python"
 dev = ["pytype", "pytest", "pytest-cov", "pillow", "setuptools"]
 docs = ["sphinx>=7.0,<8.0", "furo", "m2r2", "sphinx-jsonschema"]
 test = ["pytype", "pytest", "pytest-cov", "pillow"]
+# Required for apps using the HuggingFace transformers backend
+# (clams.backends.hf). Heavy deps; opt-in only.
+hf = ["torch", "transformers", "pillow", "tqdm"]
 
 [tool.setuptools.packages.find]
 where = ["."]
diff --git a/tests/test_backends_hf.py b/tests/test_backends_hf.py
new file mode 100644
index 0000000..fae696e
--- /dev/null
+++ b/tests/test_backends_hf.py
@@ -0,0 +1,450 @@
+"""
+Tests for :mod:`clams.backends.hf`.
+
+Exercises the device / dtype / padding-side / kwargs-passthrough
+behavior of both :func:`load_hf_model` and :func:`load_hf_pipeline`
+against mocked ``transformers`` model, processor, and pipeline
+constructors.
+
+If ``torch`` is not installed, the whole file is skipped (it is an
+optional dep behind the ``[hf]`` extra).
+"""
+import unittest
+from unittest import mock
+
+import pytest
+
+pytest.importorskip('torch')
+pytest.importorskip('transformers')
+
+# Force ``transformers.pipeline`` to be eagerly resolved into the
+# package's ``__dict__``. ``transformers`` uses a lazy-loading
+# ``_LazyModule`` that fetches submodule attributes via
+# ``__getattr__`` on first access; before that, the attribute does
+# not live in ``__dict__``. The first ``mock.patch('transformers.pipeline', ...)``
+# call would then silently fail to redirect ``from transformers import pipeline``
+# inside the helper. Touching the attribute here resolves it and
+# caches it in the package dict, so subsequent ``mock.patch`` calls
+# rewrite the real entry as expected.
+import transformers  # noqa: E402
+_ = transformers.pipeline
+
+from clams.backends.hf import load_hf_model, load_hf_pipeline  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Mocks
+# ---------------------------------------------------------------------------
+
+class _MockModel:
+    """Stand-in for a ``transformers`` model class."""
+
+    # cross-test state — each test should set this to None first
+    last_from_pretrained_args = None
+    last_from_pretrained_kwargs = None
+
+    @classmethod
+    def from_pretrained(cls, model_id, **kwargs):
+        cls.last_from_pretrained_args = (model_id,)
+        cls.last_from_pretrained_kwargs = dict(kwargs)
+        return cls()
+
+    def __init__(self):
+        self.device = None
+        self.eval_called = False
+
+    def to(self, device):
+        self.device = device
+        return self
+
+    def eval(self):
+        self.eval_called = True
+        return self
+
+
+class _MockTokenizer:
+    def __init__(self):
+        self.padding_side = 'right'
+        self.pad_token = None
+        self.eos_token = '<eos>'
+
+
+class _MockProcessor:
+    """Stand-in for ``AutoProcessor`` (or similar)."""
+
+    last_from_pretrained_args = None
+    last_from_pretrained_kwargs = None
+
+    @classmethod
+    def from_pretrained(cls, model_id, **kwargs):
+        cls.last_from_pretrained_args = (model_id,)
+        cls.last_from_pretrained_kwargs = dict(kwargs)
+        return cls()
+
+    def __init__(self):
+        self.tokenizer = _MockTokenizer()
+
+
+# ---------------------------------------------------------------------------
+# Test cases
+# ---------------------------------------------------------------------------
+
+class TestDefaultsOnly(unittest.TestCase):
+    """
+    Case (a): caller passes only ``model_id`` + ``model_cls``.
+    No dtype, no padding_side, no extra kwargs.
+    """
+
+    def setUp(self):
+        _MockModel.last_from_pretrained_args = None
+        _MockModel.last_from_pretrained_kwargs = None
+        _MockProcessor.last_from_pretrained_args = None
+        _MockProcessor.last_from_pretrained_kwargs = None
+
+    def test_returns_processor_model_device_tuple(self):
+        result = load_hf_model(
+            'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+        self.assertEqual(len(result), 3)
+        processor, model, device = result
+        self.assertIsInstance(processor, _MockProcessor)
+        self.assertIsInstance(model, _MockModel)
+        self.assertIsInstance(device, str)
+        # cpu or cuda depending on host — must be one of them
+        self.assertIn(device, ('cpu', 'cuda'))
+
+    def test_no_torch_dtype_passed_when_dtype_is_none(self):
+        load_hf_model(
+            'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+        # When dtype is None, helper should NOT inject torch_dtype into
+        # model_cls.from_pretrained (let the model class use its own
+        # default).
+        kwargs = _MockModel.last_from_pretrained_kwargs
+        self.assertNotIn('torch_dtype', kwargs)
+
+    def test_padding_side_untouched_when_not_requested(self):
+        processor, _, _ = load_hf_model(
+            'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+        # Default 'right' should persist; helper should NOT have
+        # rewritten it.
+        self.assertEqual(processor.tokenizer.padding_side, 'right')
+        # pad_token should NOT have been forced to EOS.
+        self.assertIsNone(processor.tokenizer.pad_token)
+
+    def test_model_put_in_eval_mode(self):
+        _, model, _ = load_hf_model(
+            'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+        self.assertTrue(model.eval_called)
+
+
+class TestDecoderOnlyMode(unittest.TestCase):
+    """
+    Case (b): caller passes ``padding_side='left'`` (decoder-only
+    batched generation) and an explicit ``dtype``.
+    """
+
+    def setUp(self):
+        _MockModel.last_from_pretrained_args = None
+        _MockModel.last_from_pretrained_kwargs = None
+        _MockProcessor.last_from_pretrained_args = None
+        _MockProcessor.last_from_pretrained_kwargs = None
+
+    def test_padding_side_set_to_left_on_tokenizer(self):
+        processor, _, _ = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            padding_side='left',
+        )
+        self.assertEqual(processor.tokenizer.padding_side, 'left')
+
+    def test_pad_token_set_from_eos_when_unset(self):
+        processor, _, _ = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            padding_side='left',
+        )
+        self.assertEqual(
+            processor.tokenizer.pad_token,
+            processor.tokenizer.eos_token,
+        )
+
+    def test_dtype_forwarded_as_torch_dtype(self):
+        import torch
+        load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            dtype=torch.bfloat16,
+            padding_side='left',
+        )
+        self.assertEqual(
+            _MockModel.last_from_pretrained_kwargs.get('torch_dtype'),
+            torch.bfloat16,
+        )
+
+
+class TestKwargsPassThrough(unittest.TestCase):
+    """
+    Case (c): ``model_kwargs`` and ``processor_kwargs`` reach the
+    respective ``from_pretrained`` calls. Validates the SWT-style
+    pattern (use_safetensors, use_fast, add_pooling_layer, etc.).
+    """
+
+    def setUp(self):
+        _MockModel.last_from_pretrained_args = None
+        _MockModel.last_from_pretrained_kwargs = None
+        _MockProcessor.last_from_pretrained_args = None
+        _MockProcessor.last_from_pretrained_kwargs = None
+
+    def test_model_kwargs_reach_from_pretrained(self):
+        load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            model_kwargs={'use_safetensors': True,
+                          'add_pooling_layer': False},
+        )
+        kw = _MockModel.last_from_pretrained_kwargs
+        self.assertTrue(kw.get('use_safetensors'))
+        self.assertFalse(kw.get('add_pooling_layer'))
+
+    def test_processor_kwargs_reach_from_pretrained(self):
+        load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            processor_kwargs={'use_safetensors': True, 'use_fast': True},
+        )
+        kw = _MockProcessor.last_from_pretrained_kwargs
+        self.assertTrue(kw.get('use_safetensors'))
+        self.assertTrue(kw.get('use_fast'))
+
+    def test_model_id_arrives_first_positional(self):
+        load_hf_model(
+            'fake-model-id', _MockModel, processor_cls=_MockProcessor)
+        self.assertEqual(
+            _MockModel.last_from_pretrained_args, ('fake-model-id',))
+        self.assertEqual(
+            _MockProcessor.last_from_pretrained_args, ('fake-model-id',))
+
+    def test_model_and_processor_kwargs_do_not_cross_contaminate(self):
+        """SWT mixes incompatible kwargs across model and processor;
+        ensure helper doesn't blindly merge them."""
+        load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            model_kwargs={'add_pooling_layer': False},
+            processor_kwargs={'use_fast': True},
+        )
+        # add_pooling_layer is model-only; should NOT reach processor
+        self.assertNotIn(
+            'add_pooling_layer',
+            _MockProcessor.last_from_pretrained_kwargs)
+        # use_fast is processor-only; should NOT reach model
+        self.assertNotIn(
+            'use_fast',
+            _MockModel.last_from_pretrained_kwargs)
+
+
+class TestDeviceResolution(unittest.TestCase):
+    """The helper auto-detects cuda/cpu when device is None."""
+
+    def setUp(self):
+        _MockModel.last_from_pretrained_args = None
+        _MockModel.last_from_pretrained_kwargs = None
+
+    def test_explicit_device_honored(self):
+        _, model, device = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            device='cpu',
+        )
+        self.assertEqual(device, 'cpu')
+        self.assertEqual(model.device, 'cpu')
+
+
+class TestMoveToDeviceFlag(unittest.TestCase):
+    """
+    ``move_to_device=False`` skips both the ``.to(device)`` move and
+    the ``.eval()`` switch, for library-style HF wrappers that defer
+    device placement and inference-mode switching to a downstream
+    consumer.
+    """
+
+    def setUp(self):
+        _MockModel.last_from_pretrained_args = None
+        _MockModel.last_from_pretrained_kwargs = None
+
+    def test_move_skipped_when_flag_false(self):
+        _, model, _ = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            move_to_device=False,
+        )
+        # _MockModel.__init__ leaves device=None; .to() would set it.
+        self.assertIsNone(model.device)
+
+    def test_eval_skipped_when_flag_false(self):
+        _, model, _ = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            move_to_device=False,
+        )
+        self.assertFalse(model.eval_called)
+
+    def test_resolved_device_still_returned(self):
+        """Even when not moved, the resolved target is reported so the
+        downstream consumer can use it for its own ``.to(device)``."""
+        _, _, device = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            device='cpu',
+            move_to_device=False,
+        )
+        self.assertEqual(device, 'cpu')
+
+    def test_default_still_moves_and_evals(self):
+        """Regression guard: the default (omitted) value of the new
+        flag preserves prior behavior."""
+        _, model, _ = load_hf_model(
+            'fake-model-id', _MockModel,
+            processor_cls=_MockProcessor,
+            device='cpu',
+        )
+        self.assertEqual(model.device, 'cpu')
+        self.assertTrue(model.eval_called)
+
+
+# ---------------------------------------------------------------------------
+# load_hf_pipeline tests
+# ---------------------------------------------------------------------------
+
+class _FakePipeline:
+    """Captures the args/kwargs the helper forwards to
+    ``transformers.pipeline``. Behaves as the returned pipeline object
+    too -- just a tagged callable stand-in."""
+
+    last_args = None
+    last_kwargs = None
+
+    def __init__(self, *args, **kwargs):
+        type(self).last_args = args
+        type(self).last_kwargs = dict(kwargs)
+
+
+def _patch_pipeline():
+    """Patch ``transformers.pipeline`` to record its call and return a
+    ``_FakePipeline`` instance."""
+    _FakePipeline.last_args = None
+    _FakePipeline.last_kwargs = None
+    return mock.patch('transformers.pipeline', _FakePipeline)
+
+
+class TestLoadHFPipelineDefaults(unittest.TestCase):
+    """The default path: just task + model_id."""
+
+    def test_returns_pipeline_and_device(self):
+        with _patch_pipeline():
+            pipe, device = load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny')
+        self.assertIsInstance(pipe, _FakePipeline)
+        self.assertIn(device, ('cpu', 'cuda'))
+
+    def test_task_arrives_first_positional(self):
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'token-classification', 'fake/ner-model')
+        self.assertEqual(_FakePipeline.last_args, ('token-classification',))
+
+    def test_model_id_forwarded_as_model_kwarg(self):
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny')
+        self.assertEqual(
+            _FakePipeline.last_kwargs.get('model'), 'openai/whisper-tiny')
+
+    def test_no_revision_kwarg_when_not_specified(self):
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny')
+        self.assertNotIn('revision', _FakePipeline.last_kwargs)
+
+
+class TestLoadHFPipelineDevice(unittest.TestCase):
+    """Device handling: auto-detect, explicit string, explicit int."""
+
+    def test_auto_detect_when_none(self):
+        with _patch_pipeline():
+            _, device = load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny')
+        self.assertIn(device, ('cpu', 'cuda'))
+        # Same value should have been passed to pipeline().
+        self.assertEqual(_FakePipeline.last_kwargs.get('device'), device)
+
+    def test_explicit_string_device_honored(self):
+        with _patch_pipeline():
+            _, device = load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny',
+                device='cpu')
+        self.assertEqual(device, 'cpu')
+        self.assertEqual(_FakePipeline.last_kwargs.get('device'), 'cpu')
+
+    def test_explicit_int_device_honored(self):
+        """``pipeline()`` natively accepts ``-1`` for CPU, ``0+`` for
+        a specific GPU index. The helper passes it through unchanged."""
+        with _patch_pipeline():
+            _, device = load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny',
+                device=-1)
+        self.assertEqual(device, -1)
+        self.assertEqual(_FakePipeline.last_kwargs.get('device'), -1)
+
+
+class TestLoadHFPipelineKwargsPassThrough(unittest.TestCase):
+    """``model_kwargs`` lands inside ``pipeline(model_kwargs={...})``;
+    ``pipeline_kwargs`` is spread directly into the pipeline call."""
+
+    def test_pipeline_kwargs_spread_into_call(self):
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny',
+                pipeline_kwargs={
+                    'generate_kwargs': {'num_beams': 5},
+                    'batch_size': 8,
+                })
+        kw = _FakePipeline.last_kwargs
+        self.assertEqual(kw.get('generate_kwargs'), {'num_beams': 5})
+        self.assertEqual(kw.get('batch_size'), 8)
+
+    def test_model_kwargs_nested_under_model_kwargs(self):
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny',
+                model_kwargs={'use_safetensors': True})
+        kw = _FakePipeline.last_kwargs
+        self.assertEqual(kw.get('model_kwargs'),
+                         {'use_safetensors': True})
+
+    def test_revision_forwarded(self):
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny',
+                revision='abc1234')
+        self.assertEqual(_FakePipeline.last_kwargs.get('revision'), 'abc1234')
+
+    def test_explicit_helper_args_take_precedence(self):
+        """If the caller smuggles ``model`` / ``device`` / ``revision``
+        through ``pipeline_kwargs``, the helper's own args win."""
+        with _patch_pipeline():
+            load_hf_pipeline(
+                'automatic-speech-recognition', 'openai/whisper-tiny',
+                device='cpu', revision='abc1234',
+                pipeline_kwargs={
+                    'model': 'should-be-overridden',
+                    'device': 'should-be-overridden',
+                    'revision': 'should-be-overridden',
+                })
+        kw = _FakePipeline.last_kwargs
+        self.assertEqual(kw['model'], 'openai/whisper-tiny')
+        self.assertEqual(kw['device'], 'cpu')
+        self.assertEqual(kw['revision'], 'abc1234')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_promptable.py b/tests/test_promptable.py
new file mode 100644
index 0000000..44a8fe7
--- /dev/null
+++ b/tests/test_promptable.py
@@ -0,0 +1,585 @@
+"""
+Tests for :class:`clams.app.ClamsPromptableApp`.
+
+Covers the behavior documented in
+``documentation/app-baseclasses.rst``: parameter discovery via
+``inject_promptable_parameters()``, the reservation rule on
+promptable-param names, ``build_conversation()`` shape across the
+single-turn / turn-taking / user-only modes, and the
+``response_to_grounded_textdocument()`` output contract.
+"""
+import unittest
+
+from mmif import AnnotationTypes, DocumentTypes, Mmif
+
+from clams import AppMetadata, ClamsPromptableApp
+
+
+# ---------------------------------------------------------------------------
+# Test infrastructure
+# ---------------------------------------------------------------------------
+
+def make_metadata(call_helper=True, pre_declare=None,
+                  analyzer_versions=None, hf_helper=False):
+    """
+    Build a fresh AppMetadata for tests.
+
+    :param call_helper: if True, calls
+        ``ClamsPromptableApp.inject_promptable_parameters(metadata)``
+        at the end (simulating a correctly-written ``appmetadata()``).
+        Mutually exclusive with ``hf_helper``.
+    :param pre_declare: if set to a parameter spec dict, calls
+        ``metadata.add_parameter(**pre_declare)`` BEFORE the helper
+        runs — used to test reservation enforcement.
+    :param analyzer_versions: if set, passed through to
+        ``AppMetadata(analyzer_versions=...)``. Required when the
+        fixture is consumed by ``ClamsHFPromptableApp`` tests.
+    :param hf_helper: if True, calls
+        ``ClamsHFPromptableApp.inject_promptable_parameters(metadata)``
+        (the HF override of the plain promptable helper). Use for HF
+        fixture builds.
+    """
+    kwargs = dict(
+        name="Example Promptable App",
+        description="Test fixture, creating input TD - output TD alignment",
+        app_license="MIT",
+        identifier="https://apps.clams.ai/example-promptable/v1",
+        url="https://fakegithub.com/some/repository",
+    )
+    if analyzer_versions is not None:
+        kwargs['analyzer_versions'] = analyzer_versions
+    m = AppMetadata(**kwargs)
+    m.add_input(DocumentTypes.TextDocument)
+    m.add_output(DocumentTypes.TextDocument)
+    m.add_output(AnnotationTypes.Alignment)
+    if pre_declare is not None:
+        m.add_parameter(**pre_declare)
+    if hf_helper:
+        from clams.app import ClamsHFPromptableApp
+        ClamsHFPromptableApp.inject_promptable_parameters(m)
+    elif call_helper:
+        ClamsPromptableApp.inject_promptable_parameters(m)
+    return m
+
+
+def make_test_app(metadata):
+    """
+    Factory creating a fresh ClamsPromptableApp subclass that loads the
+    given metadata. Each call produces a fresh class so per-test state
+    doesn't leak.
+    """
+
+    def _load_appmetadata(self):
+        return metadata
+
+    cls = type(
+        'TestPromptableApp',
+        (ClamsPromptableApp,),
+        {
+            '_load_appmetadata': _load_appmetadata,
+            '_appmetadata': lambda self: None,
+            '_annotate': lambda self, mmif, **kw: mmif,
+            'generate': lambda self, prompt, **kw: [""],
+        },
+    )
+    return cls()
+
+
+# ---------------------------------------------------------------------------
+# Parameter discovery (via the helper)
+# ---------------------------------------------------------------------------
+
+class TestParameterDiscovery(unittest.TestCase):
+
+    def test_all_promptable_params_present_after_init(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        present = {p.name for p in app.metadata.parameters}
+        expected_promptable = {p['name']
+                               for p in ClamsPromptableApp.promptable_parameters}
+        self.assertTrue(expected_promptable.issubset(present))
+
+    def test_prompt_has_no_sdk_default(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        prompt_param = next(p for p in app.metadata.parameters
+                            if p.name == 'prompt')
+        self.assertIsNone(prompt_param.default)
+        self.assertTrue(prompt_param.multivalued)
+
+    def test_system_prompt_default_empty_string(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        sysprompt = next(p for p in app.metadata.parameters
+                         if p.name == 'systemPrompt')
+        self.assertEqual(sysprompt.default, '')
+
+    def test_temperature_default_is_zero(self):
+        """When the caller omits ``temperature``, it should arrive in
+        ``_annotate()`` as the float ``0.0`` (deterministic decoding)."""
+        app = make_test_app(make_metadata(call_helper=True))
+        refined = app._refine_params(prompt=['hi'])
+        self.assertEqual(refined['temperature'], 0.0)
+        self.assertIsInstance(refined['temperature'], float)
+
+    def test_prompt_mode_choices(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        pm = next(p for p in app.metadata.parameters
+                  if p.name == 'promptMode')
+        self.assertEqual(set(pm.choices), {'user-only', 'turn-taking'})
+        self.assertEqual(pm.default, 'turn-taking')
+
+
+# ---------------------------------------------------------------------------
+# Required-prompt validation
+# ---------------------------------------------------------------------------
+
+class TestRequiredPrompt(unittest.TestCase):
+
+    def test_refine_params_raises_when_prompt_missing(self):
+        """
+        ``prompt`` has no SDK default. ``_refine_params`` must raise
+        ``ValueError`` when the caller omits it.
+        """
+        app = make_test_app(make_metadata(call_helper=True))
+        with self.assertRaises(ValueError) as ctx:
+            app._refine_params()
+        self.assertIn('prompt', str(ctx.exception))
+
+
+# ---------------------------------------------------------------------------
+# Missing-helper validation in __init__
+# ---------------------------------------------------------------------------
+
+class TestMissingHelperValidation(unittest.TestCase):
+
+    def test_init_raises_when_helper_not_called(self):
+        """
+        If ``appmetadata()`` forgets to call
+        ``inject_promptable_parameters()``, ``__init__`` must raise
+        ``ValueError`` with an instructive message.
+        """
+        with self.assertRaises(ValueError) as ctx:
+            make_test_app(make_metadata(call_helper=False))
+        msg = str(ctx.exception)
+        self.assertIn('inject_promptable_parameters', msg)
+
+
+# ---------------------------------------------------------------------------
+# Reservation enforcement (via duplicate-name ValueError)
+# ---------------------------------------------------------------------------
+
+class TestReservationEnforcement(unittest.TestCase):
+
+    def test_redeclaring_prompt_trips_duplicate_name_error(self):
+        """
+        An app that calls ``metadata.add_parameter('prompt', ...)``
+        before the helper trips the existing duplicate-name
+        ``ValueError`` from ``AppMetadata.add_parameter`` (which the
+        helper's own ``add_parameter`` call raises).
+        """
+        with self.assertRaises(ValueError) as ctx:
+            make_metadata(
+                call_helper=True,
+                pre_declare={
+                    'name': 'prompt',
+                    'description': 'app-defined collision',
+                    'type': 'string',
+                    'multivalued': True,
+                },
+            )
+        self.assertIn("'prompt'", str(ctx.exception))
+
+    def test_redeclaring_max_new_tokens_trips_error(self):
+        with self.assertRaises(ValueError) as ctx:
+            make_metadata(
+                call_helper=True,
+                pre_declare={
+                    'name': 'maxNewTokens',
+                    'description': 'app-defined collision',
+                    'type': 'integer',
+                    'default': 1024,
+                },
+            )
+        self.assertIn("'maxNewTokens'", str(ctx.exception))
+
+
+# ---------------------------------------------------------------------------
+# annotate_param_caster covers promptable params (no stale-spec drift)
+# ---------------------------------------------------------------------------
+
+class TestAnnotateParamCaster(unittest.TestCase):
+
+    def test_caster_includes_promptable_param_specs(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        for spec in ClamsPromptableApp.promptable_parameters:
+            self.assertIn(spec['name'], app.annotate_param_spec)
+            stored_type, stored_multivalued = \
+                app.annotate_param_spec[spec['name']]
+            self.assertEqual(stored_type, spec['type'])
+            self.assertEqual(
+                stored_multivalued, spec.get('multivalued', False))
+
+    def test_multivalued_prompt_casts_to_list_of_strings(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        refined = app._refine_params(prompt=['hello', 'world'])
+        self.assertEqual(refined['prompt'], ['hello', 'world'])
+
+    def test_max_new_tokens_casts_to_int(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        refined = app._refine_params(prompt=['hi'], maxNewTokens=['1024'])
+        self.assertEqual(refined['maxNewTokens'], 1024)
+        self.assertIsInstance(refined['maxNewTokens'], int)
+
+    def test_temperature_casts_to_float(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        refined = app._refine_params(prompt=['hi'], temperature=['0.7'])
+        self.assertEqual(refined['temperature'], 0.7)
+        self.assertIsInstance(refined['temperature'], float)
+
+
+# ---------------------------------------------------------------------------
+# build_conversation
+# ---------------------------------------------------------------------------
+
+class TestBuildConversation(unittest.TestCase):
+    """
+    Covers the shape of ``ClamsPromptableApp.build_conversation()``
+    across single-turn, turn-taking, and user-only modes, and the
+    pre-built-message pass-through case.
+    """
+
+    def setUp(self):
+        self.app = make_test_app(make_metadata(call_helper=True))
+
+    def test_string_prompt_single_user_turn(self):
+        conv = self.app.build_conversation(prompt="hello")
+        self.assertEqual(len(conv), 1)
+        self.assertEqual(conv[0]['role'], 'user')
+
+    def test_single_element_list_single_user_turn(self):
+        conv = self.app.build_conversation(prompt=['hello'])
+        self.assertEqual(len(conv), 1)
+        self.assertEqual(conv[0]['role'], 'user')
+
+    def test_turn_taking_alternating_turns(self):
+        conv = self.app.build_conversation(
+            prompt=['q1', 'a1', 'q2'], prompt_mode='turn-taking')
+        self.assertEqual(len(conv), 3)
+        self.assertEqual(conv[0]['role'], 'user')
+        self.assertEqual(conv[1]['role'], 'assistant')
+        self.assertEqual(conv[2]['role'], 'user')
+
+    def test_user_only_returns_progressively_extending_lists(self):
+        convs = self.app.build_conversation(
+            prompt=['q1', 'q2', 'q3'], prompt_mode='user-only')
+        # N progressively-extending message lists, one per turn
+        self.assertEqual(len(convs), 3)
+        # last conversation has all 3 user turns (+ intermediate
+        # assistant turns once the model has filled them in; at
+        # build_conversation time the assistants are placeholders or
+        # empty — the test pins length, not exact content)
+        self.assertGreaterEqual(len(convs[-1]), 3)
+
+    def test_pre_built_list_pass_through(self):
+        msgs = [
+            {'role': 'system', 'content': 'You are helpful.'},
+            {'role': 'user', 'content': 'hi'},
+        ]
+        conv = self.app.build_conversation(prompt=msgs)
+        self.assertEqual(conv, msgs)
+
+    def test_system_prompt_prepended(self):
+        conv = self.app.build_conversation(
+            prompt='hello', system_prompt='You are helpful.')
+        # first turn is a system message
+        self.assertEqual(conv[0]['role'], 'system')
+
+    def test_images_carried_in_user_content(self):
+        sentinel = object()
+        conv = self.app.build_conversation(
+            prompt='describe this', images=[sentinel])
+        # the sentinel image should appear somewhere in the first
+        # user-turn content
+        user_turn = next(m for m in conv if m['role'] == 'user')
+        # content is typically a list of dicts; flatten to a sequence
+        # of values and check for the sentinel
+        flat = []
+
+        def _walk(x):
+            if isinstance(x, dict):
+                for v in x.values():
+                    _walk(v)
+            elif isinstance(x, list):
+                for v in x:
+                    _walk(v)
+            else:
+                flat.append(x)
+
+        _walk(user_turn['content'])
+        self.assertIn(sentinel, flat)
+
+
+# ---------------------------------------------------------------------------
+# response_to_grounded_textdocument
+# ---------------------------------------------------------------------------
+
+class TestStoreResponse(unittest.TestCase):
+
+    def setUp(self):
+        self.app = make_test_app(make_metadata(call_helper=True))
+        self.mmif = Mmif(validate=False)
+        self.view = self.mmif.new_view()
+        self.app.sign_view(self.view, {})
+        self.view.new_contain(DocumentTypes.TextDocument)
+        self.view.new_contain(AnnotationTypes.Alignment)
+
+    def test_happy_path_creates_textdocument_and_alignment(self):
+        td, align = self.app.response_to_grounded_textdocument(
+            self.view, source='src1', response='generated text')
+        self.assertEqual(td.text_value, 'generated text')
+        self.assertEqual(align.get_property('source'), 'src1')
+        self.assertEqual(align.get_property('target'), td.id)
+
+    def test_reasoning_trace_none_does_not_raise(self):
+        # no exception
+        self.app.response_to_grounded_textdocument(
+            self.view, source='src1', response='text',
+            reasoning_trace=None)
+
+    def test_reasoning_trace_not_none_raises_not_implemented(self):
+        with self.assertRaises(NotImplementedError):
+            self.app.response_to_grounded_textdocument(
+                self.view, source='src1', response='text',
+                reasoning_trace='intermediate reasoning')
+
+    # TODO (krim @ 05/28/26): this test case belongs upstream in the
+    # vocabulary type definition (the `origins`/`origination` pairing
+    # is a property of the `Document` type, per clams-vocabulary#18,
+    # not a behavior of the SDK app layer). Move once clams-vocabulary
+    # supports conditional prop validation. For now, this is a sanity
+    # check that the SDK correctly forwards both kwargs through to the
+    # underlying TD.
+    def test_origins_and_origination_written_together(self):
+        td, align = self.app.response_to_grounded_textdocument(
+            self.view, source='tf1', response='caption text',
+            origins=['tp1'], origination='derived')
+        self.assertEqual(td.get_property('origins'), ['tp1'])
+        self.assertEqual(td.get_property('origination'), 'derived')
+        self.assertEqual(align.get_property('source'), 'tf1')
+        self.assertEqual(align.get_property('target'), td.id)
+
+    def test_unpaired_origins_or_origination_raises(self):
+        unpaired = [
+            {'origins': ['tp1']},
+            {'origination': 'derived'},
+        ]
+        for kwargs in unpaired:
+            with self.subTest(**kwargs), self.assertRaises(ValueError):
+                self.app.response_to_grounded_textdocument(
+                    self.view, source='src1', response='text', **kwargs)
+
+
+# ---------------------------------------------------------------------------
+# Transport-neutral parameter casting
+# ---------------------------------------------------------------------------
+
+class TestTransportNeutralCasting(unittest.TestCase):
+    """
+    Just exercises the standard ``ClamsApp`` parameter-casting path.
+    Not envelope-specific; the point is that promptable apps see no
+    separate transport layer.
+    """
+
+    def test_multi_element_prompt_arrives_as_list_of_strings(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        refined = app._refine_params(prompt=['a', 'b', 'c'])
+        self.assertEqual(refined['prompt'], ['a', 'b', 'c'])
+        for x in refined['prompt']:
+            self.assertIsInstance(x, str)
+
+    def test_single_element_prompt_still_list(self):
+        app = make_test_app(make_metadata(call_helper=True))
+        refined = app._refine_params(prompt=['only'])
+        self.assertEqual(refined['prompt'], ['only'])
+
+
+# ---------------------------------------------------------------------------
+# ClamsHFPromptableApp class-attribute validation
+# ---------------------------------------------------------------------------
+
+class TestHFPromptableAppClassAttrs(unittest.TestCase):
+    """
+    Exercises the class-attribute validation in
+    :class:`ClamsHFPromptableApp.__init__`. The actual model loading
+    is patched out so these tests don't require torch/transformers.
+    End-to-end inference tests live separately.
+    """
+
+    SINGLETON_AV = {'org/fake-model': 'deadbee'}
+    MULTI_AV = {
+        'org/large-model': 'aaaaaaa',
+        'org/small-model': 'bbbbbbb',
+    }
+
+    def _make_subclass(
+            self, *, model_cls=object,
+            analyzer_versions=None, **extra_attrs):
+        if analyzer_versions is None:
+            analyzer_versions = dict(self.SINGLETON_AV)
+        attrs = {
+            '_load_appmetadata': lambda self: make_metadata(
+                hf_helper=True,
+                analyzer_versions=dict(analyzer_versions),
+            ),
+            '_appmetadata': lambda self: None,
+            '_annotate': lambda self, mmif, **kw: mmif,
+            'MODEL_CLS': model_cls,
+        }
+        attrs.update(extra_attrs)
+        from clams.app import ClamsHFPromptableApp
+        return type('TestHFApp', (ClamsHFPromptableApp,), attrs)
+
+    def test_missing_model_cls_raises(self):
+        cls = self._make_subclass(model_cls=None)
+        with self.assertRaises(ValueError) as ctx:
+            cls()
+        self.assertIn('MODEL_CLS', str(ctx.exception))
+
+    def test_missing_analyzer_versions_raises(self):
+        # Use the plain promptable helper so promptable params are
+        # injected (parent __init__ passes) but analyzer_versions is
+        # absent and ``model`` was never injected. HF __init__ should
+        # refuse on the analyzer_versions check.
+        from clams.app import ClamsHFPromptableApp
+        cls = type('TestHFAppBad', (ClamsHFPromptableApp,), {
+            '_load_appmetadata': lambda self: make_metadata(
+                call_helper=True),  # plain promptable, no analyzer_versions
+            '_appmetadata': lambda self: None,
+            '_annotate': lambda self, mmif, **kw: mmif,
+            'MODEL_CLS': object,
+        })
+        with self.assertRaises(ValueError) as ctx:
+            cls()
+        self.assertIn('analyzer_versions', str(ctx.exception))
+
+    def _patch_load(self):
+        """
+        Context-manager-ish helper that swaps in a fake ``load_hf_model``
+        recording every call. Returns ``(restore_fn, calls_list)``.
+        """
+        import clams.backends.hf as hf_module
+        original = hf_module.load_hf_model
+        calls = []
+
+        def fake_load(model_id, model_cls, **kwargs):
+            calls.append({'model_id': model_id, 'model_cls': model_cls, **kwargs})
+            # processor / model / device tuple uniquely identifiable
+            return (f'PROC:{model_id}@{kwargs.get("revision")}',
+                    f'MODEL:{model_id}@{kwargs.get("revision")}',
+                    'cpu')
+
+        hf_module.load_hf_model = fake_load
+        return (lambda: setattr(hf_module, 'load_hf_model', original)), calls
+
+    def test_singleton_eagerly_preloads_in_init(self):
+        restore, calls = self._patch_load()
+        try:
+            cls = self._make_subclass(
+                analyzer_versions=self.SINGLETON_AV,
+                DTYPE='FAKE_DTYPE',
+                PADDING_SIDE='left',
+                MODEL_KWARGS={'trust_remote_code': True},
+            )
+            app = cls()
+            # eager load on the single family member
+            self.assertEqual(len(calls), 1)
+            self.assertEqual(calls[0]['model_id'], 'org/fake-model')
+            self.assertEqual(calls[0]['revision'], 'deadbee')
+            self.assertEqual(calls[0]['dtype'], 'FAKE_DTYPE')
+            self.assertEqual(calls[0]['padding_side'], 'left')
+            self.assertEqual(
+                calls[0]['model_kwargs'], {'trust_remote_code': True})
+            # self.processor / self.model / self.device populated
+            self.assertEqual(app.processor, 'PROC:org/fake-model@deadbee')
+            self.assertEqual(app.model, 'MODEL:org/fake-model@deadbee')
+            self.assertEqual(app.device, 'cpu')
+        finally:
+            restore()
+
+    def test_multimember_defers_loading(self):
+        restore, calls = self._patch_load()
+        try:
+            cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+            app = cls()
+            # no eager load for multi-member families
+            self.assertEqual(calls, [])
+            self.assertIsNone(app.processor)
+            self.assertIsNone(app.model)
+            self.assertIsNone(app.device)
+        finally:
+            restore()
+
+    def test_load_model_parses_at_revision_form_and_caches(self):
+        restore, calls = self._patch_load()
+        try:
+            cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+            app = cls()
+            # first call -- load via load_hf_model
+            app.load_model('org/large-model@aaaaaaa')
+            self.assertEqual(len(calls), 1)
+            self.assertEqual(calls[0]['model_id'], 'org/large-model')
+            self.assertEqual(calls[0]['revision'], 'aaaaaaa')
+            self.assertEqual(app.processor, 'PROC:org/large-model@aaaaaaa')
+            # second call same model -- cache hit, no new load
+            app.load_model('org/large-model@aaaaaaa')
+            self.assertEqual(len(calls), 1)
+            # switch to other family member -- new load
+            app.load_model('org/small-model@bbbbbbb')
+            self.assertEqual(len(calls), 2)
+            self.assertEqual(calls[1]['model_id'], 'org/small-model')
+            self.assertEqual(calls[1]['revision'], 'bbbbbbb')
+            self.assertEqual(app.processor, 'PROC:org/small-model@bbbbbbb')
+            # back to first -- still cached
+            app.load_model('org/large-model@aaaaaaa')
+            self.assertEqual(len(calls), 2)
+            self.assertEqual(app.processor, 'PROC:org/large-model@aaaaaaa')
+        finally:
+            restore()
+
+    def test_load_model_accepts_raw_form_looks_up_revision(self):
+        restore, calls = self._patch_load()
+        try:
+            cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+            app = cls()
+            app.load_model('org/small-model')  # no @rev suffix
+            self.assertEqual(calls[0]['model_id'], 'org/small-model')
+            self.assertEqual(calls[0]['revision'], 'bbbbbbb')
+        finally:
+            restore()
+
+    def test_refine_params_expands_modelid_to_at_revision(self):
+        restore, _ = self._patch_load()
+        try:
+            cls = self._make_subclass(analyzer_versions=self.MULTI_AV)
+            app = cls()
+            refined = app._refine_params(
+                prompt=['hi'],
+                model=['org/large-model'],
+            )
+            self.assertEqual(refined['model'], 'org/large-model@aaaaaaa')
+        finally:
+            restore()
+
+    def test_singleton_default_lets_user_omit_modelid(self):
+        restore, _ = self._patch_load()
+        try:
+            cls = self._make_subclass(analyzer_versions=self.SINGLETON_AV)
+            app = cls()
+            # No model in input -- SDK fills in the singleton default,
+            # then our override expands it.
+            refined = app._refine_params(prompt=['hi'])
+            self.assertEqual(refined['model'], 'org/fake-model@deadbee')
+        finally:
+            restore()
+
+
+if __name__ == '__main__':
+    unittest.main()